def test_execute(self, tmpdir):
        npm = Ecosystem(name='npm', backend=EcosystemBackend.npm)
        flexmock(self.m.storage).should_receive('get_ecosystem').with_args(
            'npm').and_return(npm)
        name = 'wrappy'
        version = '1.0.2'
        required = {
            'homepage', 'version', 'declared_license', 'code_repository',
            'bug_reporting', 'description', 'name', 'author'
        }
        IndianaJones.fetch_artifact(npm,
                                    artifact=name,
                                    version=version,
                                    target_dir=str(tmpdir))

        args = {'ecosystem': npm.name, 'name': 'foo', 'version': 'bar'}
        flexmock(EPVCache).should_receive(
            'get_extracted_source_tarball').and_return(str(tmpdir))
        results = self.m.execute(arguments=args)
        assert results is not None
        assert isinstance(results, dict)

        details = results['details'][0]
        assert required.issubset(set(
            details.keys()))  # check at least the required are there
        assert all([details[key]
                    for key in list(required)])  # assert required are not None
        assert details['name'] == name
def test_fetch_npm_latest(tmpdir):
    cache_path = subprocess.check_output(["npm", "config", "get", "cache"], universal_newlines=True).strip()
    assert ".npm" in cache_path
    module_cache_path = osp.join(cache_path, NPM_MODULE_NAME)

    # this could go really really bad if npm returns "/"
    shutil.rmtree(module_cache_path, ignore_errors=True)  # we don't care if it doesn't exist

    npm_url = "https://registry.npmjs.org/{}".format(NPM_MODULE_NAME)
    response = requests.get(npm_url, json=True)
    try:
        assert response.status_code == 200, response.text
    except AssertionError:
        # Let's try again, but give the remote service some time to catch a breath
        time.sleep(1)
        raise
    module_json = response.json()
    latest_version = sorted(module_json["versions"].keys()).pop()
    IndianaJones.fetch_artifact(npm,
                                artifact=NPM_MODULE_NAME, target_dir=str(tmpdir))
    assert len(glob.glob(osp.join(cache_path, NPM_MODULE_NAME, "*"))) == 1,\
        "there should be just one version of the artifact in the NPM cache"

    assert osp.exists(osp.join(module_cache_path, latest_version))
    assert osp.exists(osp.join(str(tmpdir), "package.tgz"))
示例#3
0
    def test_execute(self, tmpdir):
        IndianaJones.fetch_artifact(
            ecosystem=ECOSYSTEM, artifact=MODULE_NAME,
            version=MODULE_VERSION, target_dir=str(tmpdir))

        args = dict.fromkeys(('ecosystem', 'name', 'version'), 'some-value')
        flexmock(EPVCache).should_receive('get_extracted_source_tarball').and_return(str(tmpdir))
        task = LinguistTask.create_test_instance(task_name='languages')
        results = task.execute(args)

        assert results is not None
        assert isinstance(results, dict)
        assert set(results.keys()) == {'details', 'status', 'summary'}
        details = results['details']
        assert len(details) > 3  # tarball, setup.py, LICENSE, README, etc.
        for f in details:
            if f.get('path') and f['path'].endswith('six.py'):
                # {'output': {'language': 'Python',
                #             'lines': '869',
                #             'mime': 'application/x-python',
                #             'sloc': '869',
                #             'type': 'Text'},
                #  'path': 'six-1.10.0/six.py',
                #  'type': ['Python script, ASCII text executable']},
                assert set(f.keys()) == {'output', 'path', 'type'}
                assert set(f['output'].keys()) == {'language', 'lines', 'mime', 'sloc', 'type'}
                assert f['output']['language'] == 'Python'
                assert f['type'].pop().startswith('Python')
        assert results['status'] == 'success'
def test_fetch_pypi_latest(tmpdir):
    # stolen from internets
    # http://code.activestate.com/recipes/577708-check-for-package-updates-on-pypi-works-best-in-pi/

    pypi_rpc = ServerProxy('https://pypi.python.org/pypi')
    latest_version = pypi_rpc.package_releases(PYPI_MODULE_NAME)[0]

    IndianaJones.fetch_artifact(pypi,
                                artifact=PYPI_MODULE_NAME, target_dir=str(tmpdir))

    assert len(os.listdir(str(tmpdir))) > 1
    glob_whl_path = glob.glob(osp.join(str(tmpdir),
                                       "{}-{}*".format(PYPI_MODULE_NAME, latest_version))).pop()
    assert osp.exists(glob_whl_path)
def test_fetch_rubygems_latest(tmpdir):
    rubygems_url = "https://rubygems.org/api/v1/versions/{}/latest.json".format(RUBYGEMS_MODULE_NAME)
    response = requests.get(rubygems_url, json=True)
    try:
        assert response.status_code == 200, response.text
    except AssertionError:
        # Let's try again, but give the remote service some time to catch a breath
        time.sleep(1)
        raise
    latest_version = response.json()["version"]
    IndianaJones.fetch_artifact(rubygems,
                                artifact=RUBYGEMS_MODULE_NAME, target_dir=str(tmpdir))

    assert osp.exists(osp.join(str(tmpdir), "{}-{}.gem".format(RUBYGEMS_MODULE_NAME,
                                                               latest_version)))
示例#6
0
    def test_execute(self, tmpdir):
        artifact_digest, artifact_path = IndianaJones.fetch_artifact(
            Ecosystem(name='pypi', backend=EcosystemBackend.pypi),
            artifact=PYPI_MODULE_NAME,
            version=PYPI_MODULE_VERSION,
            target_dir=str(tmpdir))

        args = dict.fromkeys(('ecosystem', 'name', 'version'), 'some-value')
        flexmock(EPVCache).should_receive(
            'get_extracted_source_tarball').and_return(str(tmpdir))
        flexmock(EPVCache).should_receive('get_source_tarball').and_return(
            artifact_path)
        task = DigesterTask.create_test_instance(task_name='digests')
        results = task.execute(arguments=args)

        assert results is not None
        assert isinstance(results, dict)
        assert set(results.keys()) == {'details', 'status', 'summary'}
        artifact_details = None
        for details in results['details']:
            assert {'sha256', 'sha1', 'md5', 'ssdeep',
                    'path'}.issubset(set(details.keys()))
            if details.get('artifact'):
                artifact_details = details
        # there are artifact details
        assert artifact_details is not None
        # the artifact digest which Indy returns is the same as the one from DigesterTask
        assert artifact_digest == artifact_details['sha256'] == compute_digest(
            artifact_path)
        assert artifact_details['path'] == 'six-1.0.0.tar.gz'
def test_fetch_maven_specific(tmpdir):
    digest, path = IndianaJones.fetch_artifact(maven,
                                               artifact=MAVEN_MODULE_NAME,
                                               version=MAVEN_MODULE_VERSION, target_dir=str(tmpdir))

    _, artifactId = MAVEN_MODULE_NAME.split(':', 1)

    assert digest == MAVEN_MODULE_DIGEST
    assert osp.exists(osp.join(str(tmpdir), '{}-{}.jar'.format(artifactId, MAVEN_MODULE_VERSION)))
def test_fetch_rubygems_specific(tmpdir):
    digest, path = IndianaJones.fetch_artifact(
        rubygems,
        artifact=RUBYGEMS_MODULE_NAME,
        version=RUBYGEMS_MODULE_VERSION, target_dir=str(tmpdir))

    assert digest == RUBYGEMS_MODULE_DIGEST
    assert osp.exists(osp.join(str(tmpdir), "{}-{}.gem".format(RUBYGEMS_MODULE_NAME,
                                                               RUBYGEMS_MODULE_VERSION)))
def test_fetch_pypi_specific(tmpdir):
    digest, path = IndianaJones.fetch_artifact(
        pypi, artifact=PYPI_MODULE_NAME,
        version=PYPI_MODULE_VERSION, target_dir=str(tmpdir))

    assert digest == PYPI_MODULE_DIGEST
    assert len(os.listdir(str(tmpdir))) > 1
    glob_whl_path = glob.glob(osp.join(str(tmpdir), "{}-{}*".format(PYPI_MODULE_NAME,
                                                                    PYPI_MODULE_VERSION))).pop()
    assert osp.exists(glob_whl_path)
    def _download_pom_xml(target, ecosystem, arguments):
        artifact_coords = MavenCoordinates.from_str(arguments['name'])
        artifact_coords.packaging = 'pom'
        artifact_coords.classifier = ''  # pom.xml files have no classifiers

        IndianaJones.fetch_artifact(
            ecosystem=ecosystem,
            artifact=artifact_coords.to_str(omit_version=True),
            version=arguments['version'],
            target_dir=target)

        # pom has to be named precisely pom.xml, otherwise mercator's Java handler
        #  which uses maven as subprocess won't see it
        pom_xml_path = os.path.join(target, 'pom.xml')
        os.rename(
            os.path.join(
                target, '{}-{}.pom'.format(artifact_coords.artifactId,
                                           arguments['version'])),
            pom_xml_path)
        return pom_xml_path
def test_fetch_maven_latest(tmpdir):
    maven_central_url = 'http://repo1.maven.org/maven2'

    groupId, artifactId = MAVEN_MODULE_NAME.split(':', 1)
    groupId = groupId.replace('.', '/')

    # get maven-metadata.xml from the repository
    url_template = '{base}/{group}/{artifact}/maven-metadata.xml'.format(base=maven_central_url,
                                                                         group=groupId,
                                                                         artifact=artifactId)
    meta = etree.parse(url_template)

    # get latest version
    version = meta.xpath('/metadata/versioning/latest')[0].text

    IndianaJones.fetch_artifact(maven,
                                artifact=MAVEN_MODULE_NAME,
                                version=None, target_dir=str(tmpdir))

    assert osp.exists(osp.join(str(tmpdir), '{}-{}.jar'.format(artifactId, version)))
def test_fetch_npm_specific(tmpdir, package, version, digest):
    cache_path = subprocess.check_output(["npm", "config", "get", "cache"], universal_newlines=True).strip()
    assert ".npm" in cache_path

    package_digest, path = IndianaJones.fetch_artifact(
        npm, artifact=package,
        version=version, target_dir=tmpdir)

    assert len(glob.glob(osp.join(cache_path, package, "*"))) == 1,\
        "there should be just one version of the artifact in the NPM cache"

    assert package_digest == digest
    assert osp.exists(path)
    assert osp.exists(osp.join(osp.join(cache_path, package), version))
    assert osp.exists(osp.join(tmpdir, "package.tgz"))
    def _download_source_jar(target, ecosystem, arguments):
        artifact_coords = MavenCoordinates.from_str(arguments['name'])
        sources_classifiers = ['sources', 'src']

        if artifact_coords.classifier not in sources_classifiers:
            for sources_classifier in sources_classifiers:
                artifact_coords.classifier = sources_classifier
                try:
                    _, source_jar_path = IndianaJones.fetch_artifact(
                        ecosystem=ecosystem,
                        artifact=artifact_coords.to_str(omit_version=True),
                        version=arguments['version'],
                        target_dir=target)
                except Exception:
                    if sources_classifier == sources_classifiers[-1]:
                        # fetching of all variants failed
                        raise
                else:
                    return source_jar_path
    def execute(self, arguments):
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))
        self._strict_assert(arguments.get('ecosystem'))

        db = self.storage.session
        e = Ecosystem.by_name(db, arguments['ecosystem'])
        p = Package.get_or_create(db,
                                  ecosystem_id=e.id,
                                  name=arguments['name'])
        v = Version.get_or_create(db,
                                  package_id=p.id,
                                  identifier=arguments['version'])

        if not arguments.get('force'):
            # TODO: this is OK for now, but if we will scale and there will be 2+ workers running this task
            # they can potentially schedule two flows of a same type at the same time
            if db.query(Analysis).filter(
                    Analysis.version_id == v.id).count() > 0:
                # we need to propagate flags that were passed to flow, but not E/P/V - this way we are sure that for
                # example graph import is scheduled (arguments['force_graph_sync'] == True)
                arguments.pop('name')
                arguments.pop('version')
                arguments.pop('ecosystem')
                return arguments

        cache_path = mkdtemp(dir=self.configuration.worker_data_dir)
        epv_cache = ObjectCache.get_from_dict(arguments)
        ecosystem = Ecosystem.by_name(db, arguments['ecosystem'])

        try:
            if not epv_cache.has_source_tarball():
                _, source_tarball_path = IndianaJones.fetch_artifact(
                    ecosystem=ecosystem,
                    artifact=arguments['name'],
                    version=arguments['version'],
                    target_dir=cache_path)
                epv_cache.put_source_tarball(source_tarball_path)

            if ecosystem.is_backed_by(EcosystemBackend.maven):
                if not epv_cache.has_source_jar():
                    try:
                        source_jar_path = self._download_source_jar(
                            cache_path, ecosystem, arguments)
                        epv_cache.put_source_jar(source_jar_path)
                    except Exception as e:
                        self.log.info(
                            'Failed to fetch source jar for maven artifact "{e}/{p}/{v}": {err}'
                            .format(e=arguments.get('ecosystem'),
                                    p=arguments.get('name'),
                                    v=arguments.get('version'),
                                    err=str(e)))

                if not epv_cache.has_pom_xml():
                    pom_xml_path = self._download_pom_xml(
                        cache_path, ecosystem, arguments)
                    epv_cache.put_pom_xml(pom_xml_path)
        finally:
            # always clean up cache
            shutil.rmtree(cache_path)

        a = Analysis(version=v,
                     access_count=1,
                     started_at=datetime.datetime.now())
        db.add(a)
        db.commit()

        arguments['document_id'] = a.id
        return arguments