示例#1
0
 def _generate_pom_xml(to_solve):
     """
     Create pom.xml with dependencies from to_solve and run 'mvn versions:resolve-ranges',
     which resolves the version ranges (overwrites the pom.xml).
     :param to_solve: {"groupId:artifactId": "version-range"}
     """
     project = etree.Element('project')
     etree.SubElement(project, 'modelVersion').text = '4.0.0'
     etree.SubElement(project, 'groupId').text = 'foo.bar.baz'
     etree.SubElement(project, 'artifactId').text = 'testing'
     etree.SubElement(project, 'version').text = '1.0.0'
     dependencies = etree.SubElement(project, 'dependencies')
     for name, version_range in to_solve.items():
         group_id, artifact_id = name.rstrip(':').split(':')
         dependency = etree.SubElement(dependencies, 'dependency')
         etree.SubElement(dependency, 'groupId').text = group_id
         etree.SubElement(dependency, 'artifactId').text = artifact_id
         etree.SubElement(dependency, 'version').text = version_range
     with open('pom.xml', 'wb') as pom:
         pom.write(
             etree.tostring(project,
                            xml_declaration=True,
                            pretty_print=True))
     TimedCommand.get_command_output(['mvn', 'versions:resolve-ranges'],
                                     graceful=False)
 def zip_file(file, archive, junk_paths=False):
     command = ['zip', archive, file]
     if junk_paths:
         # Store just the name of a saved file (junk the path), not directory names.
         # By default, zip will store the full path (relative to the current directory).
         command.extend(['--junk-paths'])
     TimedCommand.get_command_output(command)
 def archive(self, basename):
     suffix = "tar.gz"
     filename = basename + "." + suffix
     TimedCommand.get_command_output([
         "git", "archive", "--format={}".format(suffix),
         "--output={}".format(filename), "HEAD"
     ])
     return filename
示例#4
0
    def add(self, path):
        """
        add path to index

        :param path: str
        """
        with cwd(self.repo_path):
            TimedCommand.get_command_output(["git", "add", path], graceful=False)
示例#5
0
 def extract_zip(target, dest, mkdest=False):
     if mkdest:
         try:
             os.mkdir(dest, mode=0o775)
         except FileExistsError:
             pass
     # -o: overwrite existing files without prompting
     TimedCommand.get_command_output(['unzip', '-o', '-d', dest, target])
     # Fix possibly wrong permissions in zip files that would prevent us from deleting files.
     TimedCommand.get_command_output(['chmod', '-R', 'u+rwX,g+rwX', dest])
示例#6
0
    def commit(self, message='blank'):
        """
        commit git repository

        :param message: str, commit message
        """
        # --git-dir is #$%^&&
        # http://stackoverflow.com/questions/1386291/git-git-dir-not-working-as-expected
        with cwd(self.repo_path):
            TimedCommand.get_command_output(["git", "commit", "-m", message], graceful=False)
    def create_git(cls, path):
        """
        initiate new git repository at path

        :param path: str
        :return: instance of Git()
        """
        cls.config()
        TimedCommand.get_command_output(["git", "init", path], graceful=False)
        return cls(path=path)
    def clone(cls, url, path):
        """
        clone repository provided as url to specific path

        :param url: str
        :param path: str
        :return: instance of Git()
        """
        cls.config()
        TimedCommand.get_command_output(["git", "clone", url, path],
                                        graceful=False)
        return cls(path=path)
    def _use_maven_index_checker(self):
        maven_index_checker_dir = os.getenv('MAVEN_INDEX_CHECKER_PATH')
        target_dir = os.path.join(maven_index_checker_dir, 'target')

        s3 = StoragePool.get_connected_storage('S3MavenIndex')
        self.log.info('Fetching pre-built maven index from S3, if available.')
        s3.retrieve_index_if_exists(target_dir)

        index_range = '{}-{}'.format(self.count.min, self.count.max)
        command = [
            'java', '-Xmx768m', '-jar', 'maven-index-checker.jar', '-r',
            index_range
        ]
        with cwd(maven_index_checker_dir):
            output = TimedCommand.get_command_output(command,
                                                     is_json=True,
                                                     graceful=False,
                                                     timeout=1200)
            for idx, release in enumerate(output):
                name = '{}:{}'.format(release['groupId'],
                                      release['artifactId'])
                version = release['version']
                self.log.info("Scheduling #%d.", self.count.min + idx)
                self.analyses_selinon_flow(name, version)
        # index checker should clean up these dirs in /temp/ after itself, but better be sure
        for mindexerdir in glob.glob(
                os.path.join(gettempdir(), 'mindexer-ctxcentral-context*')):
            rmtree(mindexerdir)

        self.log.info('Storing pre-built maven index to S3')
        s3.store_index(target_dir)
        central_index_dir = os.path.join(target_dir, 'central-index')
        rmtree(central_index_dir)
示例#10
0
    def add_and_commit_everything(self, message="blank"):
        """
        equiv of:

            git add .
            git commit -m everything

        :param message: str, commit message
        """
        # first we need to remove any .git dirs/files from the archive, they could contain
        #  directions that would break adding (e.g. Flask 0.10 contains .git with gitpath
        #  pointing to Mitsuhiko's home dir)
        TimedCommand.get_command_output(['find', self.repo_path, '-mindepth', '2', '-name', '.git',
                                        '-exec', 'rm', '-rf', '{}', ';'])
        # add everything
        self.add(self.repo_path)
        self.commit(message=message)
        def worker(path):
            mime = TimedCommand.get_command_output(['file', path, '-b',
                                                    '-i']).pop()
            self.log.debug("%s mime = %s", path, mime)
            typ = TimedCommand.get_command_output(['file', path, '-b'])
            self.log.debug("%s filetype = %s", path, typ)

            linguist = None
            if 'charset=binary' not in mime:
                linguist = self._parse_linguist(
                    TimedCommand.get_command_output(['linguist', path]))
                self.log.debug("%s linguist output = %s", path, linguist)

            results.append({
                "type": typ,
                "output": linguist,
                "path": os.path.relpath(path, cache_path),
            })
示例#12
0
    def clone(cls, url, path, depth=None, branch=None):
        """
        clone repository provided as url to specific path

        :param url: str
        :param path: str
        :param depth: str
        :param branch: str
        :return: instance of Git()
        """
        cls.config()
        cmd = ["git", "clone", url, path]
        if depth is not None:
            cmd.extend(["--depth", depth])
        if branch is not None:
            cmd.extend(["--branch", branch])
        TimedCommand.get_command_output(cmd, graceful=False)
        return cls(path=path)
 def compute_ssdeep(self, target):
     """ Compute SSdeep piece-wise linear hash of target """
     # 0 : ssdeep header
     # 1 : hash,filename
     data = TimedCommand.get_command_output(['ssdeep', '-c', '-s', target])
     try:
         return data[1].split(',')[0].strip()
     except IndexError:
         self.log.error("unable to compute ssdeep of %r", target)
         raise RuntimeError("can't compute digest of %r" % target)
示例#14
0
    def rev_parse(self, args=None):
        """
        :param args: arguments to pass to `git rev-parse`

        :return: [str], output from `git rev-parse`
        """

        cmd = ["git", "rev-parse"]
        if args:
            cmd.extend(args)

        with cwd(self.repo_path):
            return TimedCommand.get_command_output(cmd, graceful=False)
    def _get_snyk_vulndb(self):
        """
        :return: retrieve Snyk CVE db
        """

        with tempdir() as vulndb_dir:
            # clone vulndb git repo
            self.log.debug("Cloning snyk/vulndb repo")
            Git.clone(self._VULNDB_GIT_REPO, vulndb_dir)
            with cwd(vulndb_dir):
                # install dependencies
                self.log.debug("Installing snyk/vulndb dependencies")
                TimedCommand.get_command_output(['npm', 'install'])
                # generate database (json in file)
                self.log.debug("Generating snyk/vulndb")
                TimedCommand.get_command_output([
                    os.path.join('cli', 'shrink.js'), 'data',
                    self._VULNDB_FILENAME
                ])
                # parse the JSON so we are sure that we have a valid JSON
                with open(self._VULNDB_FILENAME) as f:
                    return json.load(f)
示例#16
0
 def config():
     """
     configure git
     """
     user_name = configuration.git_user_name
     user_email = configuration.git_user_email
     if not TimedCommand.get_command_output(["git", "config", "--get", "user.name"]):
         TimedCommand.get_command_output(["git", "config", "--global", "user.name", user_name])
     if not TimedCommand.get_command_output(["git", "config", "--get", "user.email"]):
         TimedCommand.get_command_output(["git", "config", "--global", "user.email", user_email])
示例#17
0
 def extract_gem(target, dest):
     """
     extract target gem into $dest/sources and
             gemspec (renamed to rubygems-metadata.yaml) into $dest/metadata/
     """
     sources = os.path.join(dest, 'sources')
     metadata = os.path.join(dest, 'metadata')
     TimedCommand.get_command_output(['mkdir', '-p', sources, metadata])
     TimedCommand.get_command_output(['gem', 'unpack', target, '--target', sources])
     with cwd(metadata):
         # --spec ignores --target, so we need to cwd
         TimedCommand.get_command_output(['gem', 'unpack', target, '--spec'])
         metadatayaml = glob.glob('*.gemspec').pop()
         os.rename(metadatayaml, 'rubygems-metadata.yaml')
    def execute(self, arguments):
        """
        task code

        :param arguments: dictionary with arguments
        :return: {}, results
        """
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        try:
            cache_path = ObjectCache.get_from_dict(arguments).get_sources()
        except Exception as e:
            eco = arguments.get('ecosystem')
            pkg = arguments.get('name')
            ver = arguments.get('version')
            if arguments['ecosystem'] != 'maven':
                self.log.error(
                    'Could not get sources for package {e}/{p}/{v}'.format(
                        e=eco, p=pkg, v=ver))
                raise
            self.log.info('Could not get sources for maven package {p}/{v},'
                          'will try to run on binary jar'.format(p=pkg, v=ver))
            cache_path = ObjectCache.get_from_dict(
                arguments).get_extracted_source_tarball()

        result_data = {'status': 'unknown', 'summary': {}, 'details': {}}
        try:
            result_data['details'] = TimedCommand.get_command_output(
                ['license_check.py', cache_path], graceful=False, is_json=True)
            result_data['status'] = result_data['details'].pop('status')
            result_data['summary'] = result_data['details'].pop('summary')
        except:
            self.log.exception("License scan failed")
            result_data['status'] = 'error'

        return result_data
示例#19
0
    def execute(self, arguments):
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        cache_path = ObjectCache.get_from_dict(arguments).get_extracted_source_tarball()

        results = {'status': 'unknown',
                   'summary': {},
                   'details': []}

        try:
            oscc = TimedCommand.get_command_output(['oscryptocatcher', '--subdir-in-result', cache_path],
                                                   graceful=False, is_json=True)

            self.log.debug("oscryptocatcher %s output: %s", cache_path, oscc)
            results['details'] = oscc['details']
            results['summary'] = oscc['summary']
            results['status'] = 'success'
        except:
            results['status'] = 'error'

        return results
示例#20
0
    def execute(self, arguments):
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        try:
            cache_path = ObjectCache.get_from_dict(arguments).get_sources()
        except Exception:
            eco = arguments.get('ecosystem')
            pkg = arguments.get('name')
            ver = arguments.get('version')
            if arguments['ecosystem'] != 'maven':
                self.log.error(
                    'Could not get sources for package {e}/{p}/{v}'.format(
                        e=eco, p=pkg, v=ver))
                raise
            self.log.info('Could not get sources for maven package {p}/{v},'
                          'will try to run on binary jar'.format(p=pkg, v=ver))
            cache_path = ObjectCache.get_from_dict(
                arguments).get_extracted_source_tarball()

        result_data = {'status': 'unknown', 'summary': {}, 'details': {}}
        try:
            command = [
                os.path.join(
                    os.getenv('SCANCODE_PATH', '/opt/scancode-toolkit/'),
                    'scancode'),
                # Scan for licenses
                '--license',
                # Do not return license matches with scores lower than this score
                '--license-score',
                SCANCODE_LICENSE_SCORE,
                # Files without findings are omitted
                '--only-findings',
                # Use n parallel processes
                '--processes',
                SCANCODE_PROCESSES,
                # Do not print summary or progress messages
                '--quiet',
                # Strip the root directory segment of all paths
                '--strip-root',
                # Stop scanning a file if scanning takes longer than a timeout in seconds
                '--timeout',
                SCANCODE_TIMEOUT,
                cache_path
            ]
            output = TimedCommand.get_command_output(command,
                                                     graceful=False,
                                                     is_json=True,
                                                     timeout=600)
            details = self.process_output(output)
            result_data['details'] = details
            result_data['status'] = 'success'
            result_data['summary'] = {
                'sure_licenses': list(details['licenses'].keys())
            }
        except:
            self.log.exception("License scan failed")
            result_data['status'] = 'error'

        return result_data
示例#21
0
    def _run_owasp_dep_check(self, scan_path, experimental=False):
        def _clean_dep_check_tmp():
            for dcdir in glob.glob(os.path.join(gettempdir(), 'dctemp*')):
                rmtree(dcdir)

        s3 = StoragePool.get_connected_storage('S3OWASPDepCheck')
        depcheck = os.path.join(os.environ['OWASP_DEP_CHECK_PATH'], 'bin', 'dependency-check.sh')
        with tempdir() as temp_data_dir:
            retrieved = s3.retrieve_depcheck_db_if_exists(temp_data_dir)
            if not retrieved:
                self.log.debug('No cached OWASP Dependency-Check DB, generating fresh now ...')
                command = [depcheck, '--updateonly', '--data', temp_data_dir]
                # give DependencyCheck 30 minutes to download the DB
                TimedCommand.get_command_output(command, graceful=False, timeout=1800)
            report_path = os.path.join(temp_data_dir, 'report.xml')
            command = [depcheck,
                       '--noupdate',
                       '--format', 'XML',
                       '--project', 'test',
                       '--data', temp_data_dir,
                       '--scan', scan_path,
                       '--out', report_path]
            if experimental:
                command.extend(['--enableExperimental'])
            output = []
            try:
                self.log.debug('Running OWASP Dependency-Check to scan %s for vulnerabilities' %
                               scan_path)
                output = TimedCommand.get_command_output(command,
                                                         graceful=False,
                                                         timeout=600)  # 10 minutes
                with open(report_path) as r:
                    report_dict = anymarkup.parse(r.read())
            except (TaskError, FileNotFoundError) as e:
                _clean_dep_check_tmp()
                for line in output:
                    self.log.warning(line)
                self.log.exception(str(e))
                return {'summary': ['OWASP Dependency-Check scan failed'],
                        'status': 'error',
                        'details': []}
            # If the CVEDBSyncTask has never been run before, we just had to create the DB ourselves
            # Make the life easier for other workers and store it to S3
            s3.store_depcheck_db_if_not_exists(temp_data_dir)
            _clean_dep_check_tmp()


        results = []
        dependencies = report_dict.get('analysis', {}).get('dependencies', {}).get('dependency', [])
        if not isinstance(dependencies, list):
            dependencies = [dependencies]
        for dependency in dependencies:
            vulnerabilities = dependency.get('vulnerabilities', {}).get('vulnerability', [])
            if not isinstance(vulnerabilities, list):
                vulnerabilities = [vulnerabilities]
            for vulnerability in vulnerabilities:
                av = vulnerability.get('cvssAccessVector')
                av = av[0] if av else '?'
                ac = vulnerability.get('cvssAccessComplexity')
                ac = ac[0] if ac else '?'
                au = vulnerability.get('cvssAuthenticationr')
                au = au[0] if au else '?'
                c = vulnerability.get('cvssConfidentialImpact')
                c = c[0] if c else '?'
                i = vulnerability.get('cvssIntegrityImpact')
                i = i[0] if i else '?'
                a = vulnerability.get('cvssAvailabilityImpact')
                a = a[0] if a else '?'
                vector = "AV:{AV}/AC:{AC}/Au:{Au}/C:{C}/I:{I}/A:{A}".\
                    format(AV=av, AC=ac, Au=au, C=c, I=i, A=a)
                result = {
                    'cvss': {
                        'score': vulnerability.get('cvssScore'),
                        'vector': vector
                    }
                }
                references = vulnerability.get('references', {}).get('reference', [])
                if not isinstance(references, list):
                    references = [references]
                result['references'] = [r.get('url') for r in references]
                for field in ['severity', 'description']:
                    result[field] = vulnerability.get(field)
                result['id'] = vulnerability.get('name')
                results.append(result)

        return {'summary': [r['id'] for r in results],
                'status': 'success',
                'details': results}
    def fetch_artifact(ecosystem=None,
                       artifact=None,
                       version=None,
                       target_dir='.'):
        """
        download artifact from registry and process it

        :param ecosystem:
        :param artifact:
        :param version:
        :param target_dir:
        :return: tuple: (digest, artifact_path)
        """
        parsed = urlparse(artifact)
        digest = None
        artifact_path = None

        if ecosystem.is_backed_by(EcosystemBackend.pypi):
            git = Git.create_git(target_dir)
            # NOTE: we can't download Python packages via pip, because it runs setup.py
            #  even with `pip download`. Therefore we could always get syntax errors
            #  because of older/newer syntax.
            res = requests.get(
                'https://pypi.python.org/pypi/{a}/json'.format(a=artifact))
            res.raise_for_status()
            if not version:
                version = res.json()['info']['version']
            release_files = res.json()['releases'][version]

            # sort releases by order in which we'd like to download:
            #  1) sdist
            #  2) wheels
            #  3) eggs
            #  4) anything else (creepy stuff)
            def release_key(rel):
                return {
                    'sdist': 0,
                    'bdist_wheel': 1,
                    'bdist_egg': 2
                }.get(rel['packagetype'], 3)

            release_files = list(sorted(release_files, key=release_key))
            file_url = release_files[0]['url']
            local_filename = IndianaJones.download_file(file_url, target_dir)
            artifact_path = os.path.join(target_dir, local_filename)
            digest = compute_digest(artifact_path)
            Archive.extract(artifact_path, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.npm):
            git = Git.create_git(target_dir)

            # $ npm config get cache
            # /root/.npm
            cache_path = TimedCommand.get_command_output(
                ['npm', 'config', 'get', 'cache'], graceful=False).pop()

            # add package to cache:
            # /root/.npm/express/
            # └── 4.13.4
            #      ├── package
            #      │   ├── History.md
            #      │   ├── index.js
            #      │   ├── lib
            #      │   ├── LICENSE
            #      │   ├── package.json
            #      │   └── Readme.md
            #      └── package.tgz
            # 3 directories, 6 files
            name_ver = artifact
            if version:
                name_ver = "{}@{}".format(artifact, version)
            # make sure the artifact is not in the cache yet
            TimedCommand.get_command_output(
                ['npm', 'cache', 'clean', artifact], graceful=False)
            logger.info("downloading npm module %s", name_ver)
            npm_command = ['npm', 'cache', 'add', name_ver]
            TimedCommand.get_command_output(npm_command, graceful=False)

            # copy tarball to workpath
            tarball_name = "package.tgz"
            glob_path = os.path.join(cache_path, artifact, "*")
            cache_abs_path = os.path.abspath(glob.glob(glob_path).pop())
            artifact_path = os.path.join(cache_abs_path, tarball_name)
            logger.debug("[cache] tarball path = %s", artifact_path)
            artifact_path = shutil.copy(artifact_path, target_dir)

            logger.debug("[workdir] tarball path = %s", artifact_path)
            # Prior to npm-2.x.x (Fedora 24)
            # npm client was repackaging modules on download. It modified file permissions inside
            # package.tgz so they matched UID/GID of a user running npm command. Therefore its
            # digest was different then of a tarball downloaded directly from registry.npmjs.org.
            digest = compute_digest(artifact_path)
            Archive.extract(artifact_path, target_dir)

            # copy package/package.json over the extracted one,
            # because it contains (since npm >= 2.x.x) more information.
            npm_package_json = os.path.join(cache_abs_path, 'package',
                                            'package.json')
            shutil.copy(npm_package_json, target_dir)
            # copy package/npm-shrinkwrap.json to target_dir
            npm_shrinkwrap_json = os.path.join(target_dir, 'package',
                                               'npm-shrinkwrap.json')
            if os.path.isfile(npm_shrinkwrap_json):
                shutil.copy(npm_shrinkwrap_json, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.rubygems):
            git = Git.create_git(target_dir)
            logger.info("downloading rubygems package %s-%s", artifact,
                        version)
            version_arg = []
            if version:
                version_arg = ['--version', version]
            gem_command = ['gem', 'fetch', artifact]
            gem_command.extend(version_arg)
            with cwd(target_dir):
                TimedCommand.get_command_output(gem_command, graceful=False)

            if not version:
                # if version is None we need to glob for the version that was downloaded
                artifact_path = os.path.abspath(
                    glob.glob(os.path.join(target_dir, artifact + '*')).pop())
            else:
                artifact_path = os.path.join(
                    target_dir, '{n}-{v}.gem'.format(n=artifact, v=version))

            digest = compute_digest(artifact_path)
            Archive.extract(artifact_path, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.maven):
            git = Git.create_git(target_dir)
            artifact_coords = MavenCoordinates.from_str(artifact)
            # lxml can't handle HTTPS URLs
            maven_url = "http://repo1.maven.org/maven2/"
            if not version:
                version = mvn_find_latest_version(maven_url, artifact_coords)
            artifact_coords.version = version
            logger.info("downloading maven package %s",
                        artifact_coords.to_str())

            if not artifact_coords.is_valid():
                raise ValueError("Invalid Maven coordinates: {a}".format(
                    a=artifact_coords.to_str()))

            artifact_url = urljoin(maven_url, artifact_coords.to_repo_url())
            local_filename = IndianaJones.download_file(
                artifact_url, target_dir)
            if local_filename is None:
                raise RuntimeError("Unable to download: %s" % artifact_url)
            artifact_path = os.path.join(
                target_dir,
                os.path.split(artifact_coords.to_repo_url())[1])
            digest = compute_digest(artifact_path)
            if artifact_coords.packaging != 'pom':
                Archive.extract(artifact_path, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.scm):
            git = Git.clone(artifact, target_dir)
            digest = IndianaJones.get_revision(target_dir)
            artifact_path = git.archive(artifact)
        elif parsed:
            if parsed[0] == 'git' or parsed[2].endswith('.git'):
                git = Git.clone(artifact, target_dir)
                digest = IndianaJones.get_revision(target_dir)
                artifact_path = git.archive(artifact)

        return digest, artifact_path
 def get_revision(target_directory):
     """ Get digest of last commit """
     with cwd(target_directory):
         return TimedCommand.get_command_output(
             ['git', 'rev-parse', 'HEAD'], graceful=False).pop()
 def extract_tar(target, dest):
     TimedCommand.get_command_output(['tar', 'xf', target, '-C', dest])
 def extract_zip(target, dest):
     # -o: overwrite existing files without prompting
     TimedCommand.get_command_output(['unzip', '-o', '-d', dest, target])
     # Fix possibly wrong permissions in zip files that would prevent us from deleting files.
     TimedCommand.get_command_output(['chmod', '-R', 'u+rwX,g+rwX', dest])
 def _update_dep_check_db(self, data_dir):
     depcheck = os.path.join(os.environ['OWASP_DEP_CHECK_PATH'], 'bin',
                             'dependency-check.sh')
     self.log.debug('Updating OWASP Dependency-Check CVE DB')
     TimedCommand.get_command_output(
         [depcheck, '--updateonly', '--data', data_dir], timeout=1800)