Exemplo n.º 1
0
    def _build_from_git(cls):
        """Build the database from upstream GitHub and our own.

        We do this before we contribute back to Victims.
        """
        with tempfile.TemporaryDirectory() as temp_dir:
            # Clone VictimsDB and create a ZIP out of it
            with tempfile.TemporaryDirectory() as tf:
                git = Git.clone(VICTIMS_URL, path=tf, single_branch=True)
                victims_zip_path = git.archive(basename='victims',
                                               basedir=temp_dir,
                                               format='zip')

            # Clone f8a CveDB and create a ZIP out of it
            with tempfile.TemporaryDirectory() as tf:
                git = Git.clone(F8A_CVEDB_URL, path=tf, single_branch=True)
                cvedb_zip_path = git.archive(basename='cvedb',
                                             basedir=temp_dir,
                                             format='zip')

            # Merge the two ZIP files
            with zipfile.ZipFile(victims_zip_path, 'a') as victims_zip:
                cvedb_zip = zipfile.ZipFile(cvedb_zip_path, 'r')
                for n in cvedb_zip.namelist():

                    victims_zip.writestr(n, cvedb_zip.open(n).read())

            db_path = tempfile.mkstemp(prefix='victims-db-', suffix='.zip')[1]
            try:
                # Copy the uber-ZIP to the target location
                shutil.copyfile(victims_zip_path, db_path)
                return db_path
            except Exception:
                os.remove(db_path)
                raise
Exemplo n.º 2
0
    def get_files_github_url(self, github_url):
        """Clone the repository from GitHub and retrieve manifest files from it."""
        manifest_data = []
        repo_suffix = parse_gh_repo(github_url)
        try:
            self.del_temp_files()
            repo_url = urljoin(self.PREFIX_URL, repo_suffix)
            check_valid_repo = get(repo_url)
            if check_valid_repo.status_code == 200:
                repo_clone_url = urljoin(self.PREFIX_GIT_URL, repo_suffix,
                                         '.git')
                Git.clone(repo_clone_url, self.CLONED_DIR)
                for file_obj in self.get_manifest_files():
                    file_content = None
                    filename = file_obj.get('filename')
                    filepath = file_obj.get('filepath')
                    with open(filepath, 'rb') as m_file:
                        file_content = m_file.read().decode('utf-8')
                    manifest_data.append({
                        "filename":
                        filename,
                        "content":
                        file_content,
                        "filepath":
                        filepath.replace(self.CLONED_DIR, '')
                    })
        except Exception:
            raise HTTPError(500, "Error in reading repo from github.")
        finally:
            self.del_temp_files()

        return manifest_data
Exemplo n.º 3
0
 def update_victims_cve_db_on_s3():
     """Update Victims CVE DB on S3."""
     repo_url = 'https://github.com/victims/victims-cve-db.git'
     s3 = StoragePool.get_connected_storage('S3VulnDB')
     with TemporaryDirectory() as temp_dir:
         Git.clone(repo_url, temp_dir, depth="1")
         s3.store_victims_db(temp_dir)
 def test_clone(self, tmpdir, url, ok):
     """Test Git.clone()."""
     tmpdir = str(tmpdir)
     if ok:
         Git.clone(url, tmpdir)
         assert (Path(tmpdir) / '.git').is_dir()
         assert (Path(tmpdir) / 'README.md').is_file()
     else:
         with pytest.raises(TaskError):
             Git.clone(url, tmpdir)
Exemplo n.º 5
0
    def run_mercator_on_git_repo(self, arguments):
        """Clone specified git url and run mercator on it."""
        self._strict_assert(arguments.get('url'))

        with TemporaryDirectory() as workdir:
            repo_url = arguments.get('url')
            repo = Git.clone(repo_url, path=workdir, depth=str(1))
            metadata = self.run_mercator(arguments,
                                         workdir,
                                         keep_path=True,
                                         outermost_only=False,
                                         timeout=900)
            if metadata.get('status', None) != 'success':
                self.log.error('Mercator failed on %s', repo_url)
                return None

            # add some auxiliary information so we can later find the manifest file
            head = repo.rev_parse(['HEAD'])[0]
            for detail in metadata['details']:
                path = detail['path'][len(workdir):]
                # path should look like this:
                # <git-sha1>/path/to/manifest.file
                detail['path'] = head + path

            return metadata
Exemplo n.º 6
0
    def fetch_releases(self, package):
        """Fetch package releases versions."""
        if not package:
            raise ValueError('package not specified')

        parts = package.split("/")[:3]
        if len(parts
               ) == 3:  # this assumes github.com/org/project like structure
            host, org, proj = parts
            repo_url = 'git://{host}/{org}/{proj}.git'.format(host=host,
                                                              org=org,
                                                              proj=proj)
        elif len(parts) == 2 and parts[
                0] == 'gopkg.in':  # specific to gopkg.in/packages
            host, proj = parts
            repo_url = 'https://{host}/{proj}.git'.format(host=host, proj=proj)
        else:
            raise ValueError(
                "Package {} is invalid git repository".format(package))

        output = Git.ls_remote(repo_url, args=['-q'], refs=['HEAD'])
        version, ref = output[0].split()

        if not version:
            raise ValueError(
                "Package {} does not have associated versions".format(package))

        return package, [version]
Exemplo n.º 7
0
    def extract_dependencies(github_repo, github_sha):
        """Extract the dependencies information.

        Currently assuming repository is maven/npm/python repository.

        :param github_repo: repository url
        :param github_sha: commit hash
        :return: set of direct (and indirect) dependencies
        """
        with TemporaryDirectory() as workdir:
            repo = Git.clone(url=github_repo, path=workdir, timeout=3600)
            repo.reset(revision=github_sha, hard=True)
            with cwd(repo.repo_path):
                # TODO: Make this task also work for files not present in root directory.

                # First change the package-lock.json to npm-shrinkwrap.json
                GithubDependencyTreeTask.change_package_lock_to_shrinkwrap()

                if peek(Path.cwd().glob("pom.xml")):
                    return GithubDependencyTreeTask.get_maven_dependencies()
                elif peek(Path.cwd().glob("npm-shrinkwrap.json")) \
                        or peek(Path.cwd().glob("package.json")):
                    return GithubDependencyTreeTask.get_npm_dependencies(
                        repo.repo_path)
                elif peek(Path.cwd().glob("requirements.txt")):
                    return GithubDependencyTreeTask.get_python_dependencies(
                        repo.repo_path)
                elif peek(Path.cwd().glob("glide.lock")):
                    return GithubDependencyTreeTask.get_go_glide_dependencies(
                        repo.repo_path)
                elif peek(Path.cwd().glob("Gopkg.lock")):
                    return GithubDependencyTreeTask.get_go_pkg_dependencies()
                else:
                    raise TaskError("Please provide maven or npm or "
                                    "python or Go repository for scanning!")
    def _get_log(url):
        """Clone Git repo and get its log.

        :param url: url to the git repo
        """
        with TemporaryDirectory() as tmp_dir:
            git = Git.clone(url, tmp_dir)
            # nice notebook to check at:
            #   http://nbviewer.jupyter.org/github/tarmstrong/code-analysis/blob/master/IPythonReviewTime.ipynb
            log = git.log()

        return log
 def test_git_add_and_commit_everything_with_dotgit(self, tmpdir):
     """Test Git.add_and_commit_everything()."""
     tmpdir = Path(str(tmpdir))
     # if there's a .git file somewhere in the archive, we don't want it to fail adding
     subprocess.check_output(['git', 'init', str(tmpdir)],
                             universal_newlines=True)
     d = tmpdir / 'foo'
     d.mkdir(parents=True)
     (d / '.git').touch()
     # we need at least one normal file for git to commit
     (d / 'foo').touch()
     g = Git.create_git(str(tmpdir))
     g.add_and_commit_everything()
 def test_git_add_and_commit_everything_with_dotgit(self, tmpdir):
     # if there's a .git file somewhere in the archive, we don't want it to fail adding
     subprocess.check_output(['git', 'init', str(tmpdir)],
                             universal_newlines=True)
     d = os.path.join(str(tmpdir), 'foo')
     os.makedirs(d)
     with open(os.path.join(d, '.git'), 'w') as f:
         f.write('gitdir: /this/doesnt/exist/hehehe')
     # we need at least one normal file for git to commit
     with open(os.path.join(d, 'foo'), 'w'):
         pass
     g = Git.create_git(str(tmpdir))
     g.add_and_commit_everything()
def get_manifest_file_from_git_repo(git_repo_url):
    repo = ""
    with TemporaryDirectory() as workdir:
        try:
            repo = Git.clone(url=git_repo_url, path="/tmp/")
        except Exception as e:
            print ("Exception %r" % e)
            raise

        with cwd(repo.repo_path):
            if peek(Path.cwd().glob("pom.xml")):
                print ('{}/pom.xml'.format(Path.cwd()))
                f = open('{}/pom.xml'.format(Path.cwd()))
                return f
    return None
Exemplo n.º 12
0
    def fetch_releases(self, package):
        if not package:
            raise ValueError('package not specified')

        gh_host, gh_org, gh_proj = package.split("/")[:3]
        repo_url = 'git://{gh_host}/{gh_org}/{gh_proj}.git'.format(
            gh_host=gh_host, gh_org=gh_org, gh_proj=gh_proj)
        output = Git.ls_remote(repo_url, args=['-q'], refs=['HEAD'])
        version, ref = output[0].split()

        if not version:
            raise ValueError(
                "Package {} does not have associated versions".format(package))

        return package, [version]
    def extract_dependencies(github_repo, github_sha):
        """Extract the dependencies information.

        Currently assuming repository is maven repository.
        """
        with TemporaryDirectory() as workdir:
            repo = Git.clone(url=github_repo, path=workdir, timeout=3600)
            repo.reset(revision=github_sha, hard=True)
            with cwd(repo.repo_path):
                output_file = Path.cwd() / "dependency-tree.txt"
                cmd = ["mvn", "org.apache.maven.plugins:maven-dependency-plugin:3.0.2:tree",
                       "-DoutputType=dot",
                       "-DoutputFile={filename}".format(filename=output_file),
                       "-DappendOutput=true"]
                timed_cmd = TimedCommand(cmd)
                status, output, _ = timed_cmd.run(timeout=3600)
                if status != 0 or not output_file.is_file():
                    # all errors are in stdout, not stderr
                    raise TaskError(output)
                with output_file.open() as f:
                    return GithubDependencyTreeTask.parse_maven_dependency_tree(f.readlines())
Exemplo n.º 14
0
    def extract_dependencies(github_repo, github_sha=None, user_flow=False):
        """Extract the dependencies information.

        Currently assuming repository is maven/npm/python repository.

        :param user_flow: to indicate if user flow is invoked
        :param github_repo: repository url
        :param github_sha: commit hash
        :return: set of direct (and indirect) dependencies
        """
        with TemporaryDirectory() as workdir:
            repo = Git.clone(url=github_repo, path=workdir, timeout=3600)
            if github_sha is not None:
                repo.reset(revision=github_sha, hard=True)
            with cwd(repo.repo_path):
                # TODO: Make this task also work for files not present in root directory.

                # First change the package-lock.json to npm-shrinkwrap.json
                GithubDependencyTreeTask.change_package_lock_to_shrinkwrap()

                # Since user flow is only called for maven, we pass this flag only to maven
                if peek(Path.cwd().glob("pom.xml")):
                    return GithubDependencyTreeTask.get_maven_dependencies(
                        user_flow)
                elif peek(Path.cwd().glob("npm-shrinkwrap.json")) \
                        or peek(Path.cwd().glob("package.json")):
                    return GithubDependencyTreeTask.get_npm_dependencies(
                        repo.repo_path)
                elif peek(Path.cwd().glob("requirements.txt")):
                    return GithubDependencyTreeTask.get_python_dependencies(
                        repo.repo_path)
                elif peek(Path.cwd().glob("glide.lock")):
                    return GithubDependencyTreeTask.get_go_glide_dependencies(
                        repo.repo_path)
                elif peek(Path.cwd().glob("Gopkg.lock")):
                    return GithubDependencyTreeTask.get_go_pkg_dependencies()
                else:
                    return None