def get(self, owner, repo, ver=None): self.response.headers['Access-Control-Allow-Origin'] = '*' owner = owner.lower() repo = repo.lower() library_key = ndb.Key(Library, Library.id(owner, repo)) if ver is None: ver = yield Library.latest_version_for_key_async(library_key) if ver is None: self.response.set_status(404) return version_key = ndb.Key(Library, Library.id(owner, repo), Version, ver) analysis = Content.get_by_id('analysis', parent=version_key, read_policy=ndb.EVENTUAL_CONSISTENCY) if analysis is None: self.response.set_status(404) return self.response.headers['Content-Type'] = 'application/json' result = {} result['status'] = analysis.status if analysis.status == Status.ready: result['content'] = json.loads(analysis.content) if analysis.status == Status.error: result['error'] = analysis.error if result['status'] != Status.ready: self.response.set_status(400) self.response.headers['Content-Type'] = 'application/json' self.response.write(json.dumps(result))
def get(self, owner, repo, version=None): self.response.headers['Access-Control-Allow-Origin'] = '*' self.response.headers['Content-Type'] = 'application/json' library_key = ndb.Key(Library, Library.id(owner, repo)) if version is None: version = yield Library.default_version_for_key_async(library_key) if version is None: self.response.set_status(404) return version_key = ndb.Key(Library, library_key.id(), Version, version) bower = yield Content.get_by_id_async('bower', parent=version_key) if bower is None: self.response.set_status(404) return bower_json = bower.get_json() bower_dependencies = bower_json.get('dependencies', {}) dependencies = [] version_futures = [] for name in bower_dependencies.keys(): dependency = Dependency.from_string(bower_dependencies[name]) if dependency is None: continue dependencies.append(dependency) dependency_library_key = ndb.Key(Library, Library.id(dependency.owner, dependency.repo)) version_futures.append(Library.versions_for_key_async(dependency_library_key)) dependency_futures = [] for i, dependency in enumerate(dependencies): versions = yield version_futures[i] def matches(version, spec): try: return versiontag.match(version, spec) except ValueError: # FIXME: What other cases do we need to support here? return False while len(versions) > 0 and not matches(versions[-1], dependency.version): versions.pop() if len(versions) > 0: dependency_library_key = ndb.Key(Library, Library.id(dependency.owner.lower(), dependency.repo.lower())) dependency_futures.append(LibraryMetadata.brief_async(dependency_library_key, versions[-1])) results = [] for future in dependency_futures: dependency_result = yield future if dependency_result is not None: results.append(dependency_result) result = { 'results': results, 'count': len(results), } self.response.write(json.dumps(result))
def update_metadata(self): headers = {'Accept': 'application/vnd.github.drax-preview+json'} response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers) if response.status_code == 200: try: metadata = json.loads(response.content) except ValueError: return self.error("could not parse metadata") repo = metadata.get('name', '').lower() owner = metadata.get('owner', {}).get('login', '').lower() if repo != '' and owner != '' and (repo != self.repo or owner != self.owner): logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) task_url = util.ensure_library_task(owner, repo) util.new_task(task_url, target='manage') raise RequestAborted('repo has been renamed to %s', Library.id(owner, repo)) self.library.metadata = response.content self.library.metadata_etag = response.headers.get('ETag', None) self.library.metadata_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 404: logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) raise RequestAborted('repo no longer exists') elif response.status_code != 304: return self.retry('could not update repo metadata (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse contributors") self.library.contributors = response.content self.library.contributors_etag = response.headers.get('ETag', None) self.library.contributors_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code != 304: return self.retry('could not update contributors (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse stats/participation") self.library.participation = response.content self.library.participation_etag = response.headers.get('ETag', None) self.library.participation_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 202: # GitHub is "computing" the data. We'll try again next update cycle. # TODO: Alternatively we could retry this task pass elif response.status_code != 304: return self.retry('could not update stats/participation (%d)' % response.status_code)
def init_library(self, owner, repo, create=True): self.owner = owner.lower() self.repo = repo.lower() if create: self.library = Library.get_or_insert(Library.id(owner, repo)) self.is_new = self.library.metadata is None and self.library.error is None else: self.library = Library.get_by_id(Library.id(owner, repo)) if self.library.status == Status.suppressed: raise RequestAborted('library is suppressed')
def init_library(self, scope, package, create=True): self.scope = scope.lower() self.package = package.lower() if create: self.library = Library.get_or_insert(Library.id(self.scope, self.package)) self.is_new = self.library.metadata is None and self.library.error is None else: self.library = Library.get_by_id(Library.id(self.scope, self.package)) if self.library.status == Status.suppressed: raise RequestAborted('library is suppressed')
def handle_get(self, owner, repo, latest=False): self.init_library(owner, repo) if self.library is None: self.response.set_status(404) self.response.write('could not find library: %s' % Library.id(owner, repo)) return if latest: version_id = Library.default_version_for_key_async( self.library.key).get_result() if version_id: version = Version.get_by_id(version_id, parent=self.library.key) if version is not None: self.trigger_analysis(version_id, version.sha, transactional=False) else: versions = Version.query(Version.status == Status.ready, ancestor=self.library.key).fetch() for version in versions: self.trigger_analysis(version.key.id(), version.sha, transactional=False)
def update_readme(self, is_npm_package): if is_npm_package: # Load registry metadata to fetch readme path. library = Library.get_by_id(Library.id(self.owner, self.repo)) registry_metadata = json.loads(library.registry_metadata) if library.registry_metadata else None readme_path = registry_metadata.get('readmeFilename', 'README.md') response = util.unpkg_get(self.owner, self.repo, self.version, readme_path) readme = response.content else: # Load readme from GitHub endpoint. response = util.github_get('repos', self.owner, self.repo, 'readme', params={"ref": self.sha}) if response.status_code == 200: readme = base64.b64decode(json.loads(response.content)['content']) elif response.status_code == 404: readme = None else: return self.retry('error fetching readme (%d)' % response.status_code) if readme is not None: # Store the raw readme markdown. try: Content(parent=self.version_key, id='readme', content=readme, status=Status.ready, etag=response.headers.get('ETag', None)).put() except db.BadValueError: return self.error("Could not store README.md as a utf-8 string", ErrorCodes.Version_utf) # Convert markdown to HTML and store the result. response = util.github_markdown(readme) if response.status_code == 200: Content(parent=self.version_key, id='readme.html', content=response.content, status=Status.ready, etag=response.headers.get('ETag', None)).put() else: return self.retry('error converting readme to markdown (%d)' % response.status_code)
def get(self, terms): self.response.headers['Access-Control-Allow-Origin'] = '*' scoring = self.request.get('noscore', None) is None include_results = self.request.get('noresults', None) is None include_count = self.request.get('count', None) is not None request_cursor = self.request.get('cursor', None) if not include_results: scoring = False include_count = True try: limit = min(20, int(self.request.get('limit', 20))) except ValueError: self.response.set_status(400) return index = search.Index('repo') cursor = search.Cursor(web_safe_string=request_cursor) try: # Accuracy refers to accurate till n results. accuracy = 2000 if include_count else None sort_options = search.SortOptions(match_scorer=search.MatchScorer()) if scoring else None query_options = search.QueryOptions(limit=limit, number_found_accuracy=accuracy, sort_options=sort_options, cursor=cursor) search_results = index.search(search.Query(query_string=terms, options=query_options)) cursor = search_results.cursor except search.QueryError: self.response.set_status(400) self.response.write('bad query') return count = search_results.number_found if include_results: result_futures = [] for result in search_results.results: (owner, repo) = result.doc_id.split('/') version = None for field in result.fields: if field.name == 'version': version = field.value break library_key = ndb.Key(Library, Library.id(owner, repo)) result_futures.append(LibraryMetadata.brief_async(library_key, version, assume_latest=True)) results = [] for future in result_futures: result = yield future if result is None: # Fixup count when we skip over incomplete entries. count = count - 1 if result is not None: results.append(result) result = { 'cursor': cursor.web_safe_string if cursor and include_results else None, } if include_count: result['count'] = count if include_results: result['results'] = results self.response.headers['Content-Type'] = 'application/json' self.response.write(json.dumps(result))
def get(self, owner, repo, version=None): self.response.headers['Access-Control-Allow-Origin'] = '*' self.response.headers['Content-Type'] = 'application/json' library_key = ndb.Key(Library, Library.id(owner, repo)) if version is None: version = yield Library.default_version_for_key_async(library_key) if version is None: self.response.set_status(404) return version_key = ndb.Key(Library, library_key.id(), Version, version) collection_versions = yield Version.collections_for_key_async(version_key) collection_futures = [] for collection_version in collection_versions: collection_futures.append(LibraryMetadata.brief_async(collection_version.key.parent(), collection_version.key.id())) collections = [] for future in collection_futures: collection_result = yield future if collection_result is not None: collections.append(collection_result) result = { 'results': collections, 'count': len(collections), } self.response.write(json.dumps(result))
def handle_post(self): message_json = json.loads( urllib.unquote(self.request.body).rstrip('=')) message = message_json['message'] data = base64.b64decode(str(message['data'])) attributes = message['attributes'] owner = attributes['owner'] repo = attributes['repo'] version = attributes['version'] error = attributes.get('error', None) version_key = ndb.Key(Library, Library.id(owner, repo), Version, version) content = Content.get_by_id('analysis', parent=version_key) if content is None: return if data == '': content.set_json(None) else: content.set_json(json.loads(data)) if error is None: content.status = Status.ready content.error = None else: content.status = Status.error content.error = error content.put() if version_key.id() == Library.default_version_for_key_async( version_key.parent()).get_result(): task_url = util.update_indexes_task(owner, repo) util.new_task(task_url, target='manage')
def update_search_index(self, owner, repo, version_key, library, bower): metadata = json.loads(library.metadata) registry_metadata = json.loads(library.registry_metadata) if library.registry_metadata else None npm_description = registry_metadata.get('description', '') if registry_metadata else '' npm_keywords = registry_metadata.get('keywords', []) if registry_metadata else [] fields = [ search.AtomField(name='owner', value=owner), search.TextField(name='repo', value=repo), search.AtomField(name='kind', value=library.kind), search.AtomField(name='version', value=version_key.id()), search.TextField(name='github_description', value=metadata.get('description', '')), search.TextField(name='bower_description', value=bower.get('description', '')), search.TextField(name='npm_description', value=npm_description), search.TextField(name='bower_keywords', value=' '.join(bower.get('keywords', []))), search.TextField(name='npm_keywords', value=' '.join(npm_keywords)), search.TextField(name='prefix_matches', value=' '.join(util.generate_prefixes_from_list( util.safe_split_strip(metadata.get('description')) + util.safe_split_strip(bower.get('description')) + util.safe_split_strip(repo)))), ] # Generate weighting field weights = [(repo, 10)] analysis = Content.get_by_id('analysis', parent=version_key) if analysis is not None and analysis.status == Status.ready: data = analysis.get_json() if data.get('analyzerData', None) is not None: # Use analyzer data for search index element_objects = data.get('analyzerData', {}).get('elements', []) elements = [element.get('tagname', '') or element.get('classname', '') for element in element_objects] if elements != []: fields.append(search.TextField(name='element', value=' '.join(elements))) weights.append((' '.join(elements), 5)) behavior_objects = data.get('analyzerData', {}).get('metadata', {}).get('polymer', {}).get('behaviors', []) behaviors = [behavior.get('name', '') for behavior in behavior_objects] if behaviors != []: fields.append(search.TextField(name='behavior', value=' '.join(behaviors))) weights.append((' '.join(behaviors), 5)) else: # Use hydrolysis data for search index elements = data.get('elementsByTagName', {}).keys() if elements != []: fields.append(search.TextField(name='element', value=' '.join(elements))) weights.append((' '.join(elements), 5)) behaviors = data.get('behaviorsByName', {}).keys() if behaviors != []: fields.append(search.TextField(name='behavior', value=' '.join(behaviors))) weights.append((' '.join(behaviors), 5)) weighted = [] for value, weight in weights: for _ in range(0, weight): weighted.append(value) fields.append(search.TextField(name='weighted_fields', value=' '.join(weighted))) rank = int((library.updated - datetime.datetime(2016, 1, 1)).total_seconds()) document = search.Document(doc_id=Library.id(owner, repo), fields=fields, rank=rank) index = search.Index('repo') index.put(document)
def handle_get(self, owner, repo, version): self.owner = owner self.repo = repo self.version = version library_key = ndb.Key(Library, Library.id(owner, repo)) self.version_object = Version.get_by_id(version, parent=library_key) if self.version_object is None: return self.error('Version entity does not exist: %s/%s' % (Library.id(owner, repo), version)) self.sha = self.version_object.sha self.version_key = self.version_object.key self.update_readme() self.update_bower() self.set_ready()
def handle_get(self, owner, repo, version): # FIXME: Make deletion transactional with check on library that tag is excluded. version_key = ndb.Key(Library, Library.id(owner, repo), Version, version) ndb.delete_multi(ndb.Query(ancestor=version_key).iter(keys_only=True)) if VersionCache.update(version_key.parent()): task_url = util.update_indexes_task(owner, repo) util.new_task(task_url, target='manage')
def handle_get(self, scope, package, version): # FIXME: Make deletion transactional with check on library that tag is excluded. version_key = ndb.Key(Library, Library.id(scope, package), Version, version) ndb.delete_multi(ndb.Query(ancestor=version_key).iter(keys_only=True)) if VersionCache.update(version_key.parent()): task_url = util.update_indexes_task(scope, package) util.new_task(task_url, target='manage')
def handle_get(self, owner, repo, scope, package): library = Library.get_by_id(Library.id(owner, repo)) if library is None: return library.npm_package = scope + '/' + package library.put() # Remove from search indexes. index = search.Index('repo') index.delete(Library.id(owner, repo)) npm_library = Library.get_by_id(Library.id(scope, package)) if npm_library is not None: npm_library.migrated_from_bower = True npm_library.put()
def test_ensure_when_present(self): Library(id=Library.id('owner', 'repo')).put() response = self.app.get(util.ensure_library_task('owner', 'repo'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) tasks = self.tasks.get_filtered_tasks() self.assertEqual([], [task.url for task in tasks])
def get(self, owner, repo, ver=None): use_analyzer_data = self.request.get('use_analyzer_data', None) is not None self.response.headers['Access-Control-Allow-Origin'] = '*' owner = owner.lower() repo = repo.lower() library_key = ndb.Key(Library, Library.id(owner, repo)) if ver is None: ver = yield Library.default_version_for_key_async(library_key) if ver is None: self.response.set_status(404) return version_key = ndb.Key(Library, Library.id(owner, repo), Version, ver) analysis = Content.get_by_id('analysis', parent=version_key, read_policy=ndb.EVENTUAL_CONSISTENCY) if analysis is None: self.response.set_status(404) return self.response.headers['Content-Type'] = 'application/json' result = {} result['status'] = analysis.status if analysis.status == Status.ready: content = analysis.get_json() has_analyzer_data = content.get('analyzerData', None) is not None if use_analyzer_data and has_analyzer_data: # Use the analyzer data fields result['analysis'] = content['analyzerData'] else: # Use the hydrolysis fields and delete the analyzer ones if has_analyzer_data: del content['analyzerData'] result['content'] = content if analysis.status == Status.error: result['error'] = analysis.error if result['status'] != Status.ready: self.response.set_status(400) self.response.headers['Content-Type'] = 'application/json' self.response.write(json.dumps(result))
def handle_get(self, owner, repo): self.init_library(owner, repo) if self.library is None: self.response.set_status(404) self.response.write('could not find library: %s' % Library.id(owner, repo)) return versions = Version.query(Version.status == Status.ready, ancestor=self.library.key).fetch() for version in versions: self.trigger_analysis(version.key.id(), version.sha, transactional=False)
def handle_get(self, owner, repo): index = search.Index('repo') document = index.get(Library.id(owner, repo)) if document is None: self.response.set_status(404) return for field in document.fields: self.response.write('%s: %s<br>' % (field.name, field.value)) self.response.write('rank: %s<br>' % (document.rank))
def update_collection_dependencies(self, collection_version_key, bower): dependencies = bower.get('dependencies', {}) for name in dependencies.keys(): dep = Dependency.from_string(dependencies[name]) if dep is None: continue library_key = ndb.Key(Library, Library.id(dep.owner, dep.repo)) CollectionReference.ensure(library_key, collection_version_key, semver=dep.version) task_url = util.ensure_library_task(dep.owner.lower(), dep.repo.lower()) util.new_task(task_url, target='manage')
def handle_get(self, owner, repo): self.init_library(owner, repo, create=False) if self.library is None: logging.warning('Library not found: %s', Library.id(owner, repo)) return if self.library.spdx_identifier is None: # Can't update a library if it's not licensed correctly. return self.update_metadata() self.update_versions() self.set_ready()
def update_search_index(self, owner, repo, version_key, library, bower): metadata = json.loads(library.metadata) fields = [ search.AtomField(name='owner', value=owner), search.TextField(name='repo', value=repo), search.AtomField(name='kind', value=library.kind), search.AtomField(name='version', value=version_key.id()), search.TextField(name='github_description', value=metadata.get('description', '')), search.TextField(name='bower_description', value=bower.get('description', '')), search.TextField(name='bower_keywords', value=' '.join(bower.get('keywords', []))), search.TextField( name='prefix_matches', value=' '.join( util.generate_prefixes_from_list( util.safe_split_strip(metadata.get('description')) + util.safe_split_strip(bower.get('description')) + util.safe_split_strip(repo)))), ] # Generate weighting field weights = [(repo, 10)] analysis = Content.get_by_id('analysis', parent=version_key) if analysis is not None and analysis.status == Status.ready: analysis = json.loads(analysis.content) elements = analysis.get('elementsByTagName', {}).keys() if elements != []: fields.append( search.TextField(name='element', value=' '.join(elements))) weights.append((' '.join(elements), 5)) behaviors = analysis.get('behaviorsByName', {}).keys() if behaviors != []: fields.append( search.TextField(name='behavior', value=' '.join(behaviors))) weights.append((' '.join(behaviors), 5)) weighted = [] for value, weight in weights: for _ in range(0, weight): weighted.append(value) fields.append( search.TextField(name='weighted_fields', value=' '.join(weighted))) rank = int( (library.updated - datetime.datetime(2016, 1, 1)).total_seconds()) document = search.Document(doc_id=Library.id(owner, repo), fields=fields, rank=rank) index = search.Index('repo') index.put(document)
def handle_get(self, owner, repo): library_key = ndb.Key(Library, Library.id(owner, repo)) version = Library.default_version_for_key_async(library_key).get_result() if version is None: return self.error('no versions for %s' % Library.id(owner, repo)) bower_key = ndb.Key(Library, Library.id(owner, repo), Version, version, Content, 'bower') bower_object = bower_key.get() bower = {} if bower_object is None else bower_object.get_json() version_key = bower_key.parent() library = version_key.parent().get() self.update_search_index(owner, repo, version_key, library, bower) if library.kind == 'collection': self.update_collection_dependencies(version_key, bower) default_version = Library.default_version_for_key_async(library_key).get_result() if default_version is not None and default_version != version: return self.retry('default version changed while updating indexes')
def handle_get(self, owner, repo): library_key = ndb.Key(Library, Library.id(owner, repo)) version = Library.default_version_for_key_async(library_key).get_result() if version is None: return self.error('no versions for %s' % Library.id(owner, repo)) bower_key = ndb.Key(Library, Library.id(owner, repo), Version, version, Content, 'bower') bower_object = bower_key.get() bower = {} if bower_object is None else json.loads(bower_object.content) version_key = bower_key.parent() library = version_key.parent().get() self.update_search_index(owner, repo, version_key, library, bower) if library.kind == 'collection': self.update_collection_dependencies(version_key, bower) default_version = Library.default_version_for_key_async(library_key).get_result() if default_version is not None and default_version != version: return self.retry('default version changed while updating indexes')
def test_update_all(self): library_key = Library(id='owner/repo').put() author_key = Author(id='owner').put() response = self.app.get('/manage/update-all', headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.update_library_task(library_key.id()), util.update_author_task(author_key.id()), ], [task.url for task in tasks])
def get(self, owner, repo, ver=None): self.response.headers['Access-Control-Allow-Origin'] = '*' owner = owner.lower() repo = repo.lower() library_key = ndb.Key(Library, Library.id(owner, repo)) result = yield LibraryMetadata.full_async(library_key, ver) if result is None: self.response.set_status(404) else: self.response.headers['Content-Type'] = 'application/json' if result['status'] != Status.ready: self.response.set_status(400) self.response.write(json.dumps(result))
def handle_post(self): message_json = json.loads(urllib.unquote(self.request.body).rstrip('=')) message = message_json['message'] data = base64.b64decode(str(message['data'])) attributes = message['attributes'] owner = attributes['owner'] repo = attributes['repo'] version = attributes['version'] error = attributes.get('error', None) version_key = ndb.Key(Library, Library.id(owner, repo), Version, version) content = Content.get_by_id('analysis', parent=version_key) if content is None: return if data == '': content.content = None elif len(data) > 500000: # Max entity size is only 1MB. logging.error('content was too large: %d %s %s', len(data), Library.id(owner, repo), version) error = 'content was too large: %d' % len(data) else: content.content = data if error is None: content.status = Status.ready content.error = None else: content.status = Status.error content.error = error content.put() if version_key.id() == Library.default_version_for_key_async(version_key.parent()).get_result(): task_url = util.update_indexes_task(owner, repo) util.new_task(task_url, target='manage')
def post(self): if self.request.headers.get('X-Github-Event') != 'pull_request': self.response.set_status(202) # Accepted self.response.write('Payload was not for a pull_request, aborting.') return payload = json.loads(self.request.body) if payload['action'] != 'opened' and payload['action'] != 'synchronize': self.response.set_status(202) # Accepted self.response.write('Payload was not opened or synchronize, aborting.') return # Original repo origin_owner = payload['repository']['owner']['login'] origin_repo = payload['repository']['name'] origin_full_name = payload['repository']['full_name'] # Repo where the pull request came from. pull_owner = payload['pull_request']['head']['repo']['owner']['login'] pull_repo = payload['pull_request']['head']['repo']['name'] key = ndb.Key(Library, Library.id(origin_owner, origin_repo)) library = key.get(read_policy=ndb.EVENTUAL_CONSISTENCY) if library is None: logging.error('No library object found for %s', origin_full_name) self.response.set_status(400) # Bad request self.response.write('It does not seem like this repository was registered') return sha = payload['pull_request']['head']['sha'] parsed_url = urlparse(self.request.url) params = { 'state': 'success', 'target_url': '%s://%s/preview/%s/%s/%s' % (parsed_url.scheme, parsed_url.netloc, pull_owner, pull_repo, sha), 'description': 'Preview is ready!', # TODO: Don't lie 'context': 'webcomponents/preview' } response = util.github_post('repos', origin_owner, origin_repo, 'statuses/%s' % sha, params, library.github_access_token) if response.status_code != 201: logging.error('Failed to set status on Github PR. Github returned %s:%s', response.status_code, response.content) self.response.set_status(500) self.response.write('Failed to set status on PR.') return pull_request_url = payload['pull_request']['url'] util.new_task(util.ingest_preview_task(pull_owner, pull_repo), params={'commit': sha, 'url': pull_request_url}, target='manage')
def handle_get(self, scope, package, latest=False): self.init_library(scope, package) if self.library is None: self.response.set_status(404) self.response.write('could not find library: %s' % Library.id(scope, package)) return if latest: version_id = Library.default_version_for_key_async(self.library.key).get_result() if version_id: version = Version.get_by_id(version_id, parent=self.library.key) if version is not None: self.trigger_analysis(version_id, version.sha, transactional=False) else: versions = Version.query(Version.status == Status.ready, ancestor=self.library.key).fetch() for version in versions: self.trigger_analysis(version.key.id(), version.sha, transactional=False)
def get(self, owner, repo, ver, path): self.response.headers['Access-Control-Allow-Origin'] = '*' version_key = ndb.Key(Library, Library.id(owner, repo), Version, ver) if version_key is None: self.response.set_status(404) self.response.write('Invalid repo/version') return page = Content.get_by_id('page-' + path, parent=version_key, read_policy=ndb.EVENTUAL_CONSISTENCY) if page is None: self.response.set_status(404) self.response.write('Cannot find page %s' % path) return self.response.write(page.content)
def update_search_index(self, owner, repo, version_key, library, bower): metadata = json.loads(library.metadata) fields = [ search.AtomField(name='owner', value=owner), search.TextField(name='repo', value=repo), search.AtomField(name='kind', value=library.kind), search.AtomField(name='version', value=version_key.id()), search.TextField(name='github_description', value=metadata.get('description', '')), search.TextField(name='bower_description', value=bower.get('description', '')), search.TextField(name='bower_keywords', value=' '.join(bower.get('keywords', []))), search.TextField(name='prefix_matches', value=' '.join(util.generate_prefixes_from_list( util.safe_split_strip(metadata.get('description')) + util.safe_split_strip(bower.get('description')) + util.safe_split_strip(repo)))), ] # Generate weighting field weights = [(repo, 10)] analysis = Content.get_by_id('analysis', parent=version_key) if analysis is not None and analysis.status == Status.ready: analysis = json.loads(analysis.content) elements = analysis.get('elementsByTagName', {}).keys() if elements != []: fields.append(search.TextField(name='element', value=' '.join(elements))) weights.append((' '.join(elements), 5)) behaviors = analysis.get('behaviorsByName', {}).keys() if behaviors != []: fields.append(search.TextField(name='behavior', value=' '.join(behaviors))) weights.append((' '.join(behaviors), 5)) weighted = [] for value, weight in weights: for _ in range(0, weight): weighted.append(value) fields.append(search.TextField(name='weighted_fields', value=' '.join(weighted))) rank = int((library.updated - datetime.datetime(2016, 1, 1)).total_seconds()) document = search.Document(doc_id=Library.id(owner, repo), fields=fields, rank=rank) index = search.Index('repo') index.put(document)
def update_search_index(self, owner, repo, version_key, library, bower): metadata = json.loads(library.metadata) fields = [ search.AtomField(name='owner', value=owner), search.TextField(name='repo', value=repo), search.AtomField(name='kind', value=library.kind), search.AtomField(name='version', value=version_key.id()), search.TextField(name='github_description', value=metadata.get('description', '')), search.TextField(name='bower_description', value=bower.get('description', '')), search.TextField(name='bower_keywords', value=' '.join(bower.get('keywords', []))), search.TextField( name='prefix_matches', value=' '.join( util.generate_prefixes_from_list( [repo] + util.safesplit(metadata.get('description')) + util.safesplit(bower.get('description')) + repo.replace("_", " ").replace("-", " ").split()))), ] analysis = Content.get_by_id('analysis', parent=version_key) if analysis is not None and analysis.status == Status.ready: analysis = json.loads(analysis.content) elements = analysis.get('elementsByTagName', {}).keys() if elements != []: fields.append( search.TextField(name='element', value=' '.join(elements))) behaviors = analysis.get('behaviorsByName', {}).keys() if behaviors != []: fields.append( search.TextField(name='behavior', value=' '.join(behaviors))) document = search.Document(doc_id=Library.id(owner, repo), fields=fields) index = search.Index('repo') index.put(document)
def handle_post(self): # Ignore payloads larger than 5 MB. if len(self.request.body) > 1048487 * 5: return message_json = json.loads(urllib.unquote(self.request.body).rstrip('=')) message = message_json['message'] data = base64.b64decode(str(message['data'])) attributes = message['attributes'] if len(attributes) == 0: logging.error(message) return owner = attributes['owner'] repo = attributes['repo'] version = attributes['version'] error = attributes.get('error', None) version_key = ndb.Key(Library, Library.id(owner, repo), Version, version) content = Content.get_by_id('analysis', parent=version_key) if content is None: return if data == '': content.set_json(None) else: content.set_json(json.loads(data)) if error is None: content.status = Status.ready content.error = None else: content.status = Status.error content.error = error content.put() if version_key.id() == Library.default_version_for_key_async(version_key.parent()).get_result(): task_url = util.update_indexes_task(owner, repo) util.new_task(task_url, target='manage')
def handle_get(self, owner, repo): self.response.headers['Content-Type'] = 'text/plain' delete_library(ndb.Key(Library, Library.id(owner, repo).lower()), response_for_logging=self.response)
def handle_get(self, owner, repo): library = Library.get_by_id(Library.id(owner, repo)) if library is None or library.shallow_ingestion: task_url = util.ingest_library_task(owner, repo) util.new_task(task_url, target='manage')
def update_search_index(self, owner, repo, version_key, library, bower): metadata = json.loads(library.metadata) registry_metadata = json.loads(library.registry_metadata) if library.registry_metadata else None npm_description = registry_metadata.get('description', '') if registry_metadata else '' npm_keywords = registry_metadata.get('keywords', []) if registry_metadata else [] fields = [ search.AtomField(name='owner', value=owner), search.AtomField(name='github_owner', value=library.github_owner), search.TextField(name='repo', value=repo), search.AtomField(name='kind', value=library.kind), search.AtomField(name='version', value=version_key.id()), search.TextField(name='github_description', value=metadata.get('description', '')), search.TextField(name='bower_description', value=bower.get('description', '')), search.TextField(name='npm_description', value=npm_description), search.TextField(name='bower_keywords', value=' '.join(bower.get('keywords', []))), search.TextField(name='npm_keywords', value=' '.join(npm_keywords)), search.TextField(name='prefix_matches', value=' '.join(util.generate_prefixes_from_list( util.safe_split_strip(metadata.get('description')) + util.safe_split_strip(bower.get('description')) + util.safe_split_strip(repo)))), ] # Generate weighting field weights = [(repo, 10)] analysis = Content.get_by_id('analysis', parent=version_key) if analysis is not None and analysis.status == Status.ready: data = analysis.get_json() if data.get('analyzerData', None) is not None: # Use analyzer data for search index element_objects = data.get('analyzerData', {}).get('elements', []) elements = [element.get('tagname', '') or element.get('classname', '') for element in element_objects] if elements != []: fields.append(search.TextField(name='element', value=' '.join(elements))) weights.append((' '.join(elements), 5)) behavior_objects = data.get('analyzerData', {}).get('metadata', {}).get('polymer', {}).get('behaviors', []) behaviors = [behavior.get('name', '') for behavior in behavior_objects] if behaviors != []: fields.append(search.TextField(name='behavior', value=' '.join(behaviors))) weights.append((' '.join(behaviors), 5)) else: # Use hydrolysis data for search index elements = data.get('elementsByTagName', {}).keys() if elements != []: fields.append(search.TextField(name='element', value=' '.join(elements))) weights.append((' '.join(elements), 5)) behaviors = data.get('behaviorsByName', {}).keys() if behaviors != []: fields.append(search.TextField(name='behavior', value=' '.join(behaviors))) weights.append((' '.join(behaviors), 5)) weighted = [] for value, weight in weights: for _ in range(0, weight): weighted.append(value) fields.append(search.TextField(name='weighted_fields', value=' '.join(weighted))) rank = int((library.updated - datetime.datetime(2016, 1, 1)).total_seconds()) document = search.Document(doc_id=Library.id(owner, repo), fields=fields, rank=rank) index = search.Index('repo') index.put(document)
def update_metadata(self): # Query NPM registry API for packages is_npm_package = self.scope.startswith('@') if is_npm_package: self.update_registry_info() else: self.owner = self.scope self.repo = self.package # Fetch GitHub metadata headers = {'Accept': 'application/vnd.github.drax-preview+json'} response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers) if response.status_code == 200: try: metadata = json.loads(response.content) except ValueError: return self.error("could not parse metadata", ErrorCodes.Library_parse_metadata) self.owner = metadata.get('owner', {}).get('login', '').lower() self.repo = metadata.get('name', '').lower() # Deleting is only necessary if Library entity is a GitHub repo if (not is_npm_package) and self.repo != '' and self.owner != '' and (self.repo != self.package or self.owner != self.scope): logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) task_url = util.ensure_library_task(self.owner, self.repo) util.new_task(task_url, target='manage') raise RequestAborted('repo has been renamed to %s', Library.id(self.owner, self.repo)) # If adding a NPM package that a Bower repo already points to, remove the bower one. bower_library_id = Library.id(self.owner, self.repo) if is_npm_package and bower_library_id is not None: logging.info('removing bower repo %s', Library.id(self.owner, self.repo)) task_url = util.suppress_library_task(self.owner, self.repo) util.new_task(task_url, target='manage') self.library.github_owner = self.owner self.library.github_repo = self.repo self.library.metadata = response.content self.library.metadata_etag = response.headers.get('ETag', None) self.library.metadata_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 404: logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) raise RequestAborted('repo no longer exists') elif response.status_code != 304: return self.retry('could not update repo metadata (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse contributors", ErrorCodes.Library_parse_contributors) self.library.contributors = response.content self.library.contributors_etag = response.headers.get('ETag', None) self.library.contributors_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code != 304: return self.retry('could not update contributors (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse stats/participation", ErrorCodes.Library_parse_stats) self.library.participation = response.content self.library.participation_etag = response.headers.get('ETag', None) self.library.participation_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 202: # GitHub is "computing" the data. We'll try again next update cycle. # TODO: Alternatively we could retry this task pass elif response.status_code != 304: return self.retry('could not update stats/participation (%d)' % response.status_code)
def update_metadata(self): headers = {'Accept': 'application/vnd.github.drax-preview+json'} response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers) if response.status_code == 200: try: metadata = json.loads(response.content) except ValueError: return self.error("could not parse metadata", ErrorCodes.Library_parse_metadata) repo = metadata.get('name', '').lower() owner = metadata.get('owner', {}).get('login', '').lower() if repo != '' and owner != '' and (repo != self.repo or owner != self.owner): logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) task_url = util.ensure_library_task(owner, repo) util.new_task(task_url, target='manage') raise RequestAborted('repo has been renamed to %s', Library.id(owner, repo)) self.library.metadata = response.content self.library.metadata_etag = response.headers.get('ETag', None) self.library.metadata_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 404: logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) raise RequestAborted('repo no longer exists') elif response.status_code != 304: return self.retry('could not update repo metadata (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse contributors", ErrorCodes.Library_parse_contributors) self.library.contributors = response.content self.library.contributors_etag = response.headers.get('ETag', None) self.library.contributors_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code != 304: return self.retry('could not update contributors (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse stats/participation", ErrorCodes.Library_parse_stats) self.library.participation = response.content self.library.participation_etag = response.headers.get( 'ETag', None) self.library.participation_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 202: # GitHub is "computing" the data. We'll try again next update cycle. # TODO: Alternatively we could retry this task pass elif response.status_code != 304: return self.retry('could not update stats/participation (%d)' % response.status_code)
def update_metadata(self): # Query NPM registry API for packages is_npm_package = self.scope.startswith('@') if is_npm_package: self.update_registry_info() else: self.owner = self.scope self.repo = self.package # Fetch GitHub metadata headers = {'Accept': 'application/vnd.github.drax-preview+json'} response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers) if response.status_code == 200: try: metadata = json.loads(response.content) except ValueError: return self.error("could not parse metadata", ErrorCodes.Library_parse_metadata) self.owner = metadata.get('owner', {}).get('login', '').lower() self.repo = metadata.get('name', '').lower() # Deleting is only necessary if Library entity is a GitHub repo if (not is_npm_package) and self.repo != '' and self.owner != '' and (self.repo != self.package or self.owner != self.scope): logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) task_url = util.ensure_library_task(self.owner, self.repo) util.new_task(task_url, target='manage') raise RequestAborted('repo has been renamed to %s', Library.id(self.owner, self.repo)) # If adding a NPM package that a Bower repo already points to, remove the bower one. bower_library_id = Library.id(self.owner, self.repo) if is_npm_package and bower_library_id is not None: task_url = util.migrate_library_task(self.owner, self.repo, self.scope, self.package) util.new_task(task_url, target='manage') self.library.github_owner = self.owner self.library.github_repo = self.repo self.library.metadata = response.content self.library.metadata_etag = response.headers.get('ETag', None) self.library.metadata_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 404: logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) raise RequestAborted('repo no longer exists') elif response.status_code != 304: return self.retry('could not update repo metadata (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse contributors", ErrorCodes.Library_parse_contributors) self.library.contributors = response.content self.library.contributors_etag = response.headers.get('ETag', None) self.library.contributors_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code != 304: return self.retry('could not update contributors (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse stats/participation", ErrorCodes.Library_parse_stats) self.library.participation = response.content self.library.participation_etag = response.headers.get('ETag', None) self.library.participation_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 202: # GitHub is "computing" the data. We'll try again next update cycle. # TODO: Alternatively we could retry this task pass elif response.status_code != 304: return self.retry('could not update stats/participation (%d)' % response.status_code)
def handle_get(self, scope, package): self.response.headers['Content-Type'] = 'text/plain' delete_library(ndb.Key(Library, Library.id(scope, package).lower()), response_for_logging=self.response)