def analyses(app): """Prepare the known set of data used by tests.""" e1 = Ecosystem(name='npm', backend=EcosystemBackend.npm) p1 = Package(ecosystem=e1, name='arrify') v1 = Version(package=p1, identifier='1.0.1') model1 = Analysis(version=v1, started_at=now, finished_at=later) app.rdb.session.add(model1) e2 = Ecosystem(name='pypi', backend=EcosystemBackend.pypi) p2 = Package(ecosystem=e2, name='flexmock') v2 = Version(package=p2, identifier='0.10.1') model2 = Analysis(version=v2, started_at=later, access_count=1) app.rdb.session.add(model2) app.rdb.session.commit() worker_results2 = {'a': 'b', 'c': 'd', 'e': 'f', 'g': 'h', 'i': 'j', 'digests': {'details': [{'artifact': True, 'sha1': '6be7ae55bae2372c7be490321bbe5ead278bb51b'}]}} for w, tr in worker_results2.items(): app.rdb.session.add(WorkerResult(analysis_id=model2.id, worker=w, task_result=tr)) model3 = Analysis(version=v2, started_at=later, access_count=1, audit={'audit': {'audit': 'audit', 'e': 'f', 'g': 'h'}, 'a': 'b', 'c': 'd'}) app.rdb.session.add(model3) app.rdb.session.commit() worker_results3 = {'digests': {'details': [{'artifact': True, 'sha1': '6be7ae55bae2372c7be490321bbe5ead278bb51b'}]}} for w, tr in worker_results3.items(): app.rdb.session.add(WorkerResult(analysis_id=model3.id, worker=w, task_result=tr)) app.rdb.session.commit() return (model1, model2, model3)
def fill_analyses(app): """Prepare static data used by unit tests.""" # TODO can not find any usage of this function ecosystems = [ Ecosystem(name='pypi', backend=EcosystemBackend.pypi, url='https://pypi.python.org/', fetch_url='https://pypi.python.org/pypi'), Ecosystem(name='npm', backend=EcosystemBackend.npm, url='https://www.npmjs.com/', fetch_url='https://registry.npmjs.org/'), Ecosystem(name='go', backend=EcosystemBackend.scm), ] packages = [ Package(name='flexmock', ecosystem=ecosystems[0]), Package(name='requests', ecosystem=ecosystems[0]), Package(name='sequence', ecosystem=ecosystems[1]), Package(name='arrify', ecosystem=ecosystems[1]), Package(name='serve-static', ecosystem=ecosystems[1]), ] versions = [ Version(identifier='0.10.1', package=packages[0]), Version(identifier='0.9.1', package=packages[0]), Version(identifier='2.0.0', package=packages[1]), Version(identifier='2.2.1', package=packages[2]), Version(identifier='1.0.1', package=packages[3]), Version(identifier='1.7.1', package=packages[4]), ] analyses = [ Analysis(version=versions[0], started_at=now), # pypi/flexmock/0.10.1 Analysis(version=versions[0], started_at=later, access_count=1), # pypi/flexmock/0.10.1 Analysis(version=versions[1], started_at=even_later), # pypi/flexmock/0.9.1 Analysis(version=versions[2], started_at=now), # pypi/requests/2.0.0 Analysis(version=versions[3], started_at=later), # npm/sequence/2.2.1 Analysis(version=versions[4], started_at=now, finished_at=later), # npm/arrify/1.0.1 Analysis(version=versions[5], started_at=now, finished_at=later, release='npm:serve-static:1.7.1'), # npm/serve-static/1.7.1 ] # worker results that correspond to analyses above worker_results = [ WorkerResult(worker='digests', analysis=analyses[1], task_result={'details': [{'artifact': True, 'sha1': '6be7ae55bae2372c7be490321bbe5ead278bb51b'}]}), WorkerResult(worker='static_analysis', task_result={'details': []}, analysis=analyses[1]), WorkerResult(worker='source_licenses', task_result={'schema': {'name': 'source_licenses', 'version': '1-0-0'}}, analysis=analyses[1]) ] # TODO: just a placeholder, it won't work in real tests!!! package_gh_usage = None for a in ecosystems + packages + versions + analyses + worker_results + package_gh_usage: app.rdb.session.add(a) app.rdb.session.commit() return (ecosystems, packages, versions, analyses, worker_results, package_gh_usage)
def setup_method(self, method): rdb() self.s = create_db_scoped_session() self.en = 'foo' self.pn = 'bar' self.vi = '1.1.1' self.e = Ecosystem(name=self.en, backend=EcosystemBackend.maven) self.p = Package(ecosystem=self.e, name=self.pn) self.v = Version(package=self.p, identifier=self.vi) self.a = Analysis(version=self.v, finished_at=datetime.datetime.now()) self.a2 = Analysis(version=self.v, finished_at=datetime.datetime.now() + datetime.timedelta(seconds=10)) self.s.add(self.a) self.s.add(self.a2) self.s.commit() self.bp = BayesianPostgres( connection_string=configuration.POSTGRES_CONNECTION)
def test_f8a_fetcher(self, rdb, npm): """Test F8aReleasesFetcher.""" # create initial dataset package = Package(ecosystem=npm, name='f8a') rdb.add(package) rdb.commit() versions = { '0.5.0', '0.5.1', '0.6.0', '0.6.4', '0.7.0', '0.8.0', '0.9.0', '1.0.0', '1.0.5' } for v in versions: version = Version(package=package, identifier=v) rdb.add(version) rdb.commit() analysis = Analysis(version=version) # Fetcher only selects finished analyses analysis.finished_at = datetime.datetime.utcnow() rdb.add(analysis) rdb.commit() f = F8aReleasesFetcher(npm, rdb) r = f.fetch_releases('f8a')[1] # make sure we fetched the same stuff we inserted assert set(r) == versions # first should be the latest assert r.pop() == '1.0.5' # try different dependency specs s = get_ecosystem_solver(npm, with_fetcher=f) assert s.solve(['f8a ^0.5.0'])['f8a'] == '0.5.1' assert s.solve(['f8a 0.x.x'])['f8a'] == '0.9.0' assert s.solve(['f8a >1.0.0'])['f8a'] == '1.0.5' assert s.solve(['f8a ~>0.6.0'])['f8a'] == '0.6.4' # check that with `all_versions` we return all the relevant ones assert set(s.solve(['f8a >=0.6.0'], all_versions=True)['f8a']) == \ (versions - {'0.5.0', '0.5.1'})
def execute(self, arguments): """Task code. :param arguments: dictionary with task arguments :return: {}, results """ self.log.debug("Input Arguments: {}".format(arguments)) self._strict_assert(arguments.get('name')) self._strict_assert(arguments.get('version')) self._strict_assert(arguments.get('ecosystem')) # make sure we store package name based on ecosystem package naming case sensitivity arguments['name'] = normalize_package_name(arguments['ecosystem'], arguments['name']) db = self.storage.session try: ecosystem = Ecosystem.by_name(db, arguments['ecosystem']) except NoResultFound: raise FatalTaskError('Unknown ecosystem: %r' % arguments['ecosystem']) p = Package.get_or_create(db, ecosystem_id=ecosystem.id, name=arguments['name']) v = Version.get_or_create(db, package_id=p.id, identifier=arguments['version']) if not arguments.get('force'): # TODO: this is OK for now, but if we will scale and there will be # 2+ workers running this task they can potentially schedule two # flows of a same type at the same time if db.query(Analysis).filter(Analysis.version_id == v.id).count() > 0: # we need to propagate flags that were passed to flow, but not # E/P/V - this way we are sure that for example graph import is # scheduled (arguments['force_graph_sync'] == True) arguments.pop('name') arguments.pop('version') arguments.pop('ecosystem') self.log.debug("Arguments returned by initAnalysisFlow without force: {}" .format(arguments)) return arguments cache_path = mkdtemp(dir=self.configuration.WORKER_DATA_DIR) epv_cache = ObjectCache.get_from_dict(arguments) try: if not epv_cache.\ has_source_tarball(): _, source_tarball_path = IndianaJones.fetch_artifact( ecosystem=ecosystem, artifact=arguments['name'], version=arguments['version'], target_dir=cache_path ) epv_cache.put_source_tarball(source_tarball_path) if ecosystem.is_backed_by(EcosystemBackend.maven): if not epv_cache.has_source_jar(): try: source_jar_path = self._download_source_jar(cache_path, ecosystem, arguments) epv_cache.put_source_jar(source_jar_path) except Exception as e: self.log.info( 'Failed to fetch source jar for maven artifact "{n}/{v}": {err}'. format(n=arguments.get('name'), v=arguments.get('version'), err=str(e)) ) if not epv_cache.has_pom_xml(): pom_xml_path = self._download_pom_xml(cache_path, ecosystem, arguments) epv_cache.put_pom_xml(pom_xml_path) finally: # always clean up cache shutil.rmtree(cache_path) a = Analysis(version=v, access_count=1, started_at=datetime.datetime.utcnow()) db.add(a) db.commit() arguments['document_id'] = a.id # export ecosystem backend so we can use it to easily control flow later arguments['ecosystem_backend'] = ecosystem.backend.name self.log.debug("Arguments returned by InitAnalysisFlow are: {}".format(arguments)) return arguments
def execute(self, arguments): """Task code. :param arguments: dictionary with task arguments :return: {}, results """ self.log.debug("Input Arguments: {}".format(arguments)) self._strict_assert(isinstance(arguments.get('ecosystem'), str)) self._strict_assert(isinstance(arguments.get('name'), str)) self._strict_assert(isinstance(arguments.get('version'), str)) db = self.storage.session try: ecosystem = Ecosystem.by_name(db, arguments['ecosystem']) except NoResultFound: raise FatalTaskError('Unknown ecosystem: %r' % arguments['ecosystem']) # make sure we store package name in its normalized form arguments['name'] = normalize_package_name(ecosystem.backend.name, arguments['name']) if len(pattern_ignore.findall(arguments['version'])) > 0: self.log.info("Incorrect version alert {} {}".format( arguments['name'], arguments['version'])) raise NotABugFatalTaskError("Incorrect version alert {} {}".format( arguments['name'], arguments['version'])) # Dont try ingestion for private packages if is_pkg_public(arguments['ecosystem'], arguments['name']): self.log.info("Ingestion flow for {} {}".format( arguments['ecosystem'], arguments['name'])) else: self.log.info("Private package ingestion ignored {} {}".format( arguments['ecosystem'], arguments['name'])) raise NotABugFatalTaskError("Private package alert {} {}".format( arguments['ecosystem'], arguments['name'])) p = Package.get_or_create(db, ecosystem_id=ecosystem.id, name=arguments['name']) v = Version.get_or_create(db, package_id=p.id, identifier=arguments['version']) if not arguments.get('force'): if db.query(Analysis).filter( Analysis.version_id == v.id).count() > 0: arguments['analysis_already_exists'] = True self.log.debug( "Arguments returned by initAnalysisFlow without force: {}". format(arguments)) return arguments cache_path = mkdtemp(dir=self.configuration.WORKER_DATA_DIR) epv_cache = ObjectCache.get_from_dict(arguments) npm_dir = self.configuration.NPM_DATA_DIR try: if not epv_cache.\ has_source_tarball(): _, source_tarball_path = IndianaJones.fetch_artifact( ecosystem=ecosystem, artifact=arguments['name'], version=arguments['version'], target_dir=cache_path) epv_cache.put_source_tarball(source_tarball_path) if ecosystem.is_backed_by(EcosystemBackend.maven): if not epv_cache.has_source_jar(): try: source_jar_path = self._download_source_jar( cache_path, ecosystem, arguments) epv_cache.put_source_jar(source_jar_path) except Exception as e: self.log.info( 'Failed to fetch source jar for maven artifact "{n}/{v}": {err}' .format(n=arguments.get('name'), v=arguments.get('version'), err=str(e))) if not epv_cache.has_pom_xml(): pom_xml_path = self._download_pom_xml( cache_path, ecosystem, arguments) epv_cache.put_pom_xml(pom_xml_path) finally: # always clean up cache shutil.rmtree(cache_path) if arguments['ecosystem'] == "npm": shutil.rmtree(npm_dir, True) a = Analysis(version=v, access_count=1, started_at=datetime.datetime.utcnow()) db.add(a) db.commit() arguments['document_id'] = a.id # export ecosystem backend so we can use it to easily control flow later arguments['ecosystem_backend'] = ecosystem.backend.name self.log.debug( "Arguments returned by InitAnalysisFlow are: {}".format(arguments)) return arguments