Пример #1
0
def analyses(app):
    """Prepare the known set of data used by tests."""
    e1 = Ecosystem(name='npm', backend=EcosystemBackend.npm)
    p1 = Package(ecosystem=e1, name='arrify')
    v1 = Version(package=p1, identifier='1.0.1')
    model1 = Analysis(version=v1, started_at=now, finished_at=later)
    app.rdb.session.add(model1)

    e2 = Ecosystem(name='pypi', backend=EcosystemBackend.pypi)
    p2 = Package(ecosystem=e2, name='flexmock')
    v2 = Version(package=p2, identifier='0.10.1')
    model2 = Analysis(version=v2, started_at=later, access_count=1)
    app.rdb.session.add(model2)
    app.rdb.session.commit()

    worker_results2 = {'a': 'b', 'c': 'd', 'e': 'f', 'g': 'h', 'i': 'j',
                       'digests': {'details':
                                   [{'artifact': True,
                                     'sha1': '6be7ae55bae2372c7be490321bbe5ead278bb51b'}]}}
    for w, tr in worker_results2.items():
        app.rdb.session.add(WorkerResult(analysis_id=model2.id, worker=w, task_result=tr))

    model3 = Analysis(version=v2, started_at=later, access_count=1,
                      audit={'audit': {'audit': 'audit', 'e': 'f', 'g': 'h'}, 'a': 'b', 'c': 'd'})
    app.rdb.session.add(model3)
    app.rdb.session.commit()
    worker_results3 = {'digests': {'details':
                                   [{'artifact': True,
                                     'sha1': '6be7ae55bae2372c7be490321bbe5ead278bb51b'}]}}
    for w, tr in worker_results3.items():
        app.rdb.session.add(WorkerResult(analysis_id=model3.id, worker=w, task_result=tr))
    app.rdb.session.commit()
    return (model1, model2, model3)
def fill_analyses(app):
    """Prepare static data used by unit tests."""
    # TODO can not find any usage of this function
    ecosystems = [
        Ecosystem(name='pypi', backend=EcosystemBackend.pypi, url='https://pypi.python.org/',
                  fetch_url='https://pypi.python.org/pypi'),
        Ecosystem(name='npm', backend=EcosystemBackend.npm, url='https://www.npmjs.com/',
                  fetch_url='https://registry.npmjs.org/'),
        Ecosystem(name='go', backend=EcosystemBackend.scm),
    ]

    packages = [
        Package(name='flexmock', ecosystem=ecosystems[0]),
        Package(name='requests', ecosystem=ecosystems[0]),
        Package(name='sequence', ecosystem=ecosystems[1]),
        Package(name='arrify', ecosystem=ecosystems[1]),
        Package(name='serve-static', ecosystem=ecosystems[1]),
    ]

    versions = [
        Version(identifier='0.10.1', package=packages[0]),
        Version(identifier='0.9.1', package=packages[0]),
        Version(identifier='2.0.0', package=packages[1]),
        Version(identifier='2.2.1', package=packages[2]),
        Version(identifier='1.0.1', package=packages[3]),
        Version(identifier='1.7.1', package=packages[4]),
    ]

    analyses = [
        Analysis(version=versions[0], started_at=now),                     # pypi/flexmock/0.10.1
        Analysis(version=versions[0], started_at=later, access_count=1),   # pypi/flexmock/0.10.1
        Analysis(version=versions[1], started_at=even_later),              # pypi/flexmock/0.9.1
        Analysis(version=versions[2], started_at=now),                     # pypi/requests/2.0.0
        Analysis(version=versions[3], started_at=later),                   # npm/sequence/2.2.1
        Analysis(version=versions[4], started_at=now, finished_at=later),  # npm/arrify/1.0.1
        Analysis(version=versions[5], started_at=now, finished_at=later,
                 release='npm:serve-static:1.7.1'),                      # npm/serve-static/1.7.1
    ]
    # worker results that correspond to analyses above
    worker_results = [
        WorkerResult(worker='digests', analysis=analyses[1],
                     task_result={'details': [{'artifact': True,
                                               'sha1':
                                               '6be7ae55bae2372c7be490321bbe5ead278bb51b'}]}),
        WorkerResult(worker='static_analysis', task_result={'details': []}, analysis=analyses[1]),
        WorkerResult(worker='source_licenses',
                     task_result={'schema': {'name': 'source_licenses', 'version': '1-0-0'}},
                     analysis=analyses[1])
    ]

    # TODO: just a placeholder, it won't work in real tests!!!
    package_gh_usage = None

    for a in ecosystems + packages + versions + analyses + worker_results + package_gh_usage:
        app.rdb.session.add(a)
        app.rdb.session.commit()

    return (ecosystems, packages, versions, analyses, worker_results, package_gh_usage)
    def setup_method(self, method):
        rdb()
        self.s = create_db_scoped_session()
        self.en = 'foo'
        self.pn = 'bar'
        self.vi = '1.1.1'
        self.e = Ecosystem(name=self.en, backend=EcosystemBackend.maven)
        self.p = Package(ecosystem=self.e, name=self.pn)
        self.v = Version(package=self.p, identifier=self.vi)
        self.a = Analysis(version=self.v, finished_at=datetime.datetime.now())
        self.a2 = Analysis(version=self.v,
                           finished_at=datetime.datetime.now() +
                           datetime.timedelta(seconds=10))
        self.s.add(self.a)
        self.s.add(self.a2)
        self.s.commit()

        self.bp = BayesianPostgres(
            connection_string=configuration.POSTGRES_CONNECTION)
    def test_f8a_fetcher(self, rdb, npm):
        """Test F8aReleasesFetcher."""
        # create initial dataset
        package = Package(ecosystem=npm, name='f8a')
        rdb.add(package)
        rdb.commit()
        versions = {
            '0.5.0', '0.5.1', '0.6.0', '0.6.4', '0.7.0', '0.8.0', '0.9.0',
            '1.0.0', '1.0.5'
        }
        for v in versions:
            version = Version(package=package, identifier=v)
            rdb.add(version)
            rdb.commit()
            analysis = Analysis(version=version)
            # Fetcher only selects finished analyses
            analysis.finished_at = datetime.datetime.utcnow()
            rdb.add(analysis)
            rdb.commit()

        f = F8aReleasesFetcher(npm, rdb)

        r = f.fetch_releases('f8a')[1]

        # make sure we fetched the same stuff we inserted
        assert set(r) == versions

        # first should be the latest
        assert r.pop() == '1.0.5'

        # try different dependency specs
        s = get_ecosystem_solver(npm, with_fetcher=f)
        assert s.solve(['f8a ^0.5.0'])['f8a'] == '0.5.1'
        assert s.solve(['f8a 0.x.x'])['f8a'] == '0.9.0'
        assert s.solve(['f8a >1.0.0'])['f8a'] == '1.0.5'
        assert s.solve(['f8a ~>0.6.0'])['f8a'] == '0.6.4'

        # check that with `all_versions` we return all the relevant ones
        assert set(s.solve(['f8a >=0.6.0'], all_versions=True)['f8a']) == \
            (versions - {'0.5.0', '0.5.1'})
Пример #5
0
    def execute(self, arguments):
        self._strict_assert(arguments.get('url'))
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('repo_name'))

        db = self.storage.session

        a = Analysis(started_at=datetime.datetime.now())
        db.add(a)
        db.commit()

        arguments['document_id'] = a.id
        return arguments
Пример #6
0
    def execute(self, arguments):
        """Task code.

        :param arguments: dictionary with task arguments
        :return: {}, results
        """
        self._strict_assert(arguments.get('url'))
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('repo_name'))

        db = self.storage.session

        a = Analysis(started_at=datetime.datetime.utcnow())
        db.add(a)
        db.commit()

        arguments['document_id'] = a.id
        return arguments
Пример #7
0
    def execute(self, arguments):
        """Task code.

        :param arguments: dictionary with task arguments
        :return: {}, results
        """
        self.log.debug("Input Arguments: {}".format(arguments))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))
        self._strict_assert(arguments.get('ecosystem'))

        # make sure we store package name based on ecosystem package naming case sensitivity
        arguments['name'] = normalize_package_name(arguments['ecosystem'], arguments['name'])

        db = self.storage.session
        try:
            ecosystem = Ecosystem.by_name(db, arguments['ecosystem'])
        except NoResultFound:
            raise FatalTaskError('Unknown ecosystem: %r' % arguments['ecosystem'])

        p = Package.get_or_create(db, ecosystem_id=ecosystem.id, name=arguments['name'])
        v = Version.get_or_create(db, package_id=p.id, identifier=arguments['version'])

        if not arguments.get('force'):
            # TODO: this is OK for now, but if we will scale and there will be
            # 2+ workers running this task they can potentially schedule two
            # flows of a same type at the same time
            if db.query(Analysis).filter(Analysis.version_id == v.id).count() > 0:
                # we need to propagate flags that were passed to flow, but not
                # E/P/V - this way we are sure that for example graph import is
                # scheduled (arguments['force_graph_sync'] == True)
                arguments.pop('name')
                arguments.pop('version')
                arguments.pop('ecosystem')
                self.log.debug("Arguments returned by initAnalysisFlow without force: {}"
                               .format(arguments))
                return arguments

        cache_path = mkdtemp(dir=self.configuration.WORKER_DATA_DIR)
        epv_cache = ObjectCache.get_from_dict(arguments)

        try:
            if not epv_cache.\
                    has_source_tarball():
                _, source_tarball_path = IndianaJones.fetch_artifact(
                    ecosystem=ecosystem,
                    artifact=arguments['name'],
                    version=arguments['version'],
                    target_dir=cache_path
                )
                epv_cache.put_source_tarball(source_tarball_path)

            if ecosystem.is_backed_by(EcosystemBackend.maven):
                if not epv_cache.has_source_jar():
                    try:
                        source_jar_path = self._download_source_jar(cache_path, ecosystem,
                                                                    arguments)
                        epv_cache.put_source_jar(source_jar_path)
                    except Exception as e:
                        self.log.info(
                            'Failed to fetch source jar for maven artifact "{n}/{v}": {err}'.
                            format(n=arguments.get('name'),
                                   v=arguments.get('version'),
                                   err=str(e))
                        )

                if not epv_cache.has_pom_xml():
                    pom_xml_path = self._download_pom_xml(cache_path, ecosystem, arguments)
                    epv_cache.put_pom_xml(pom_xml_path)
        finally:
            # always clean up cache
            shutil.rmtree(cache_path)

        a = Analysis(version=v, access_count=1, started_at=datetime.datetime.utcnow())
        db.add(a)
        db.commit()

        arguments['document_id'] = a.id

        # export ecosystem backend so we can use it to easily control flow later
        arguments['ecosystem_backend'] = ecosystem.backend.name

        self.log.debug("Arguments returned by InitAnalysisFlow are: {}".format(arguments))
        return arguments
    def execute(self, arguments):
        """Task code.

        :param arguments: dictionary with task arguments
        :return: {}, results
        """
        self.log.debug("Input Arguments: {}".format(arguments))
        self._strict_assert(isinstance(arguments.get('ecosystem'), str))
        self._strict_assert(isinstance(arguments.get('name'), str))
        self._strict_assert(isinstance(arguments.get('version'), str))

        db = self.storage.session
        try:
            ecosystem = Ecosystem.by_name(db, arguments['ecosystem'])
        except NoResultFound:
            raise FatalTaskError('Unknown ecosystem: %r' %
                                 arguments['ecosystem'])

        # make sure we store package name in its normalized form
        arguments['name'] = normalize_package_name(ecosystem.backend.name,
                                                   arguments['name'])

        if len(pattern_ignore.findall(arguments['version'])) > 0:
            self.log.info("Incorrect version alert {} {}".format(
                arguments['name'], arguments['version']))
            raise NotABugFatalTaskError("Incorrect version alert {} {}".format(
                arguments['name'], arguments['version']))

        # Dont try ingestion for private packages
        if is_pkg_public(arguments['ecosystem'], arguments['name']):
            self.log.info("Ingestion flow for {} {}".format(
                arguments['ecosystem'], arguments['name']))
        else:
            self.log.info("Private package ingestion ignored {} {}".format(
                arguments['ecosystem'], arguments['name']))
            raise NotABugFatalTaskError("Private package alert {} {}".format(
                arguments['ecosystem'], arguments['name']))

        p = Package.get_or_create(db,
                                  ecosystem_id=ecosystem.id,
                                  name=arguments['name'])
        v = Version.get_or_create(db,
                                  package_id=p.id,
                                  identifier=arguments['version'])

        if not arguments.get('force'):
            if db.query(Analysis).filter(
                    Analysis.version_id == v.id).count() > 0:
                arguments['analysis_already_exists'] = True
                self.log.debug(
                    "Arguments returned by initAnalysisFlow without force: {}".
                    format(arguments))
                return arguments

        cache_path = mkdtemp(dir=self.configuration.WORKER_DATA_DIR)
        epv_cache = ObjectCache.get_from_dict(arguments)
        npm_dir = self.configuration.NPM_DATA_DIR

        try:
            if not epv_cache.\
                    has_source_tarball():
                _, source_tarball_path = IndianaJones.fetch_artifact(
                    ecosystem=ecosystem,
                    artifact=arguments['name'],
                    version=arguments['version'],
                    target_dir=cache_path)
                epv_cache.put_source_tarball(source_tarball_path)

            if ecosystem.is_backed_by(EcosystemBackend.maven):
                if not epv_cache.has_source_jar():
                    try:
                        source_jar_path = self._download_source_jar(
                            cache_path, ecosystem, arguments)
                        epv_cache.put_source_jar(source_jar_path)
                    except Exception as e:
                        self.log.info(
                            'Failed to fetch source jar for maven artifact "{n}/{v}": {err}'
                            .format(n=arguments.get('name'),
                                    v=arguments.get('version'),
                                    err=str(e)))

                if not epv_cache.has_pom_xml():
                    pom_xml_path = self._download_pom_xml(
                        cache_path, ecosystem, arguments)
                    epv_cache.put_pom_xml(pom_xml_path)
        finally:
            # always clean up cache
            shutil.rmtree(cache_path)
            if arguments['ecosystem'] == "npm":
                shutil.rmtree(npm_dir, True)

        a = Analysis(version=v,
                     access_count=1,
                     started_at=datetime.datetime.utcnow())
        db.add(a)
        db.commit()

        arguments['document_id'] = a.id

        # export ecosystem backend so we can use it to easily control flow later
        arguments['ecosystem_backend'] = ecosystem.backend.name

        self.log.debug(
            "Arguments returned by InitAnalysisFlow are: {}".format(arguments))
        return arguments