def _build_from_git(cls): """Build the database from upstream GitHub and our own. We do this before we contribute back to Victims. """ with tempfile.TemporaryDirectory() as temp_dir: # Clone VictimsDB and create a ZIP out of it with tempfile.TemporaryDirectory() as tf: git = Git.clone(VICTIMS_URL, path=tf, single_branch=True) victims_zip_path = git.archive(basename='victims', basedir=temp_dir, format='zip') # Clone f8a CveDB and create a ZIP out of it with tempfile.TemporaryDirectory() as tf: git = Git.clone(F8A_CVEDB_URL, path=tf, single_branch=True) cvedb_zip_path = git.archive(basename='cvedb', basedir=temp_dir, format='zip') # Merge the two ZIP files with zipfile.ZipFile(victims_zip_path, 'a') as victims_zip: cvedb_zip = zipfile.ZipFile(cvedb_zip_path, 'r') for n in cvedb_zip.namelist(): victims_zip.writestr(n, cvedb_zip.open(n).read()) db_path = tempfile.mkstemp(prefix='victims-db-', suffix='.zip')[1] try: # Copy the uber-ZIP to the target location shutil.copyfile(victims_zip_path, db_path) return db_path except Exception: os.remove(db_path) raise
def get_files_github_url(self, github_url): """Clone the repository from GitHub and retrieve manifest files from it.""" manifest_data = [] repo_suffix = parse_gh_repo(github_url) try: self.del_temp_files() repo_url = urljoin(self.PREFIX_URL, repo_suffix) check_valid_repo = get(repo_url) if check_valid_repo.status_code == 200: repo_clone_url = urljoin(self.PREFIX_GIT_URL, repo_suffix, '.git') Git.clone(repo_clone_url, self.CLONED_DIR) for file_obj in self.get_manifest_files(): file_content = None filename = file_obj.get('filename') filepath = file_obj.get('filepath') with open(filepath, 'rb') as m_file: file_content = m_file.read().decode('utf-8') manifest_data.append({ "filename": filename, "content": file_content, "filepath": filepath.replace(self.CLONED_DIR, '') }) except Exception: raise HTTPError(500, "Error in reading repo from github.") finally: self.del_temp_files() return manifest_data
def update_victims_cve_db_on_s3(): """Update Victims CVE DB on S3.""" repo_url = 'https://github.com/victims/victims-cve-db.git' s3 = StoragePool.get_connected_storage('S3VulnDB') with TemporaryDirectory() as temp_dir: Git.clone(repo_url, temp_dir, depth="1") s3.store_victims_db(temp_dir)
def test_clone(self, tmpdir, url, ok): """Test Git.clone().""" tmpdir = str(tmpdir) if ok: Git.clone(url, tmpdir) assert (Path(tmpdir) / '.git').is_dir() assert (Path(tmpdir) / 'README.md').is_file() else: with pytest.raises(TaskError): Git.clone(url, tmpdir)
def run_mercator_on_git_repo(self, arguments): """Clone specified git url and run mercator on it.""" self._strict_assert(arguments.get('url')) with TemporaryDirectory() as workdir: repo_url = arguments.get('url') repo = Git.clone(repo_url, path=workdir, depth=str(1)) metadata = self.run_mercator(arguments, workdir, keep_path=True, outermost_only=False, timeout=900) if metadata.get('status', None) != 'success': self.log.error('Mercator failed on %s', repo_url) return None # add some auxiliary information so we can later find the manifest file head = repo.rev_parse(['HEAD'])[0] for detail in metadata['details']: path = detail['path'][len(workdir):] # path should look like this: # <git-sha1>/path/to/manifest.file detail['path'] = head + path return metadata
def fetch_releases(self, package): """Fetch package releases versions.""" if not package: raise ValueError('package not specified') parts = package.split("/")[:3] if len(parts ) == 3: # this assumes github.com/org/project like structure host, org, proj = parts repo_url = 'git://{host}/{org}/{proj}.git'.format(host=host, org=org, proj=proj) elif len(parts) == 2 and parts[ 0] == 'gopkg.in': # specific to gopkg.in/packages host, proj = parts repo_url = 'https://{host}/{proj}.git'.format(host=host, proj=proj) else: raise ValueError( "Package {} is invalid git repository".format(package)) output = Git.ls_remote(repo_url, args=['-q'], refs=['HEAD']) version, ref = output[0].split() if not version: raise ValueError( "Package {} does not have associated versions".format(package)) return package, [version]
def extract_dependencies(github_repo, github_sha): """Extract the dependencies information. Currently assuming repository is maven/npm/python repository. :param github_repo: repository url :param github_sha: commit hash :return: set of direct (and indirect) dependencies """ with TemporaryDirectory() as workdir: repo = Git.clone(url=github_repo, path=workdir, timeout=3600) repo.reset(revision=github_sha, hard=True) with cwd(repo.repo_path): # TODO: Make this task also work for files not present in root directory. # First change the package-lock.json to npm-shrinkwrap.json GithubDependencyTreeTask.change_package_lock_to_shrinkwrap() if peek(Path.cwd().glob("pom.xml")): return GithubDependencyTreeTask.get_maven_dependencies() elif peek(Path.cwd().glob("npm-shrinkwrap.json")) \ or peek(Path.cwd().glob("package.json")): return GithubDependencyTreeTask.get_npm_dependencies( repo.repo_path) elif peek(Path.cwd().glob("requirements.txt")): return GithubDependencyTreeTask.get_python_dependencies( repo.repo_path) elif peek(Path.cwd().glob("glide.lock")): return GithubDependencyTreeTask.get_go_glide_dependencies( repo.repo_path) elif peek(Path.cwd().glob("Gopkg.lock")): return GithubDependencyTreeTask.get_go_pkg_dependencies() else: raise TaskError("Please provide maven or npm or " "python or Go repository for scanning!")
def _get_log(url): """Clone Git repo and get its log. :param url: url to the git repo """ with TemporaryDirectory() as tmp_dir: git = Git.clone(url, tmp_dir) # nice notebook to check at: # http://nbviewer.jupyter.org/github/tarmstrong/code-analysis/blob/master/IPythonReviewTime.ipynb log = git.log() return log
def test_git_add_and_commit_everything_with_dotgit(self, tmpdir): """Test Git.add_and_commit_everything().""" tmpdir = Path(str(tmpdir)) # if there's a .git file somewhere in the archive, we don't want it to fail adding subprocess.check_output(['git', 'init', str(tmpdir)], universal_newlines=True) d = tmpdir / 'foo' d.mkdir(parents=True) (d / '.git').touch() # we need at least one normal file for git to commit (d / 'foo').touch() g = Git.create_git(str(tmpdir)) g.add_and_commit_everything()
def test_git_add_and_commit_everything_with_dotgit(self, tmpdir): # if there's a .git file somewhere in the archive, we don't want it to fail adding subprocess.check_output(['git', 'init', str(tmpdir)], universal_newlines=True) d = os.path.join(str(tmpdir), 'foo') os.makedirs(d) with open(os.path.join(d, '.git'), 'w') as f: f.write('gitdir: /this/doesnt/exist/hehehe') # we need at least one normal file for git to commit with open(os.path.join(d, 'foo'), 'w'): pass g = Git.create_git(str(tmpdir)) g.add_and_commit_everything()
def get_manifest_file_from_git_repo(git_repo_url): repo = "" with TemporaryDirectory() as workdir: try: repo = Git.clone(url=git_repo_url, path="/tmp/") except Exception as e: print ("Exception %r" % e) raise with cwd(repo.repo_path): if peek(Path.cwd().glob("pom.xml")): print ('{}/pom.xml'.format(Path.cwd())) f = open('{}/pom.xml'.format(Path.cwd())) return f return None
def fetch_releases(self, package): if not package: raise ValueError('package not specified') gh_host, gh_org, gh_proj = package.split("/")[:3] repo_url = 'git://{gh_host}/{gh_org}/{gh_proj}.git'.format( gh_host=gh_host, gh_org=gh_org, gh_proj=gh_proj) output = Git.ls_remote(repo_url, args=['-q'], refs=['HEAD']) version, ref = output[0].split() if not version: raise ValueError( "Package {} does not have associated versions".format(package)) return package, [version]
def extract_dependencies(github_repo, github_sha): """Extract the dependencies information. Currently assuming repository is maven repository. """ with TemporaryDirectory() as workdir: repo = Git.clone(url=github_repo, path=workdir, timeout=3600) repo.reset(revision=github_sha, hard=True) with cwd(repo.repo_path): output_file = Path.cwd() / "dependency-tree.txt" cmd = ["mvn", "org.apache.maven.plugins:maven-dependency-plugin:3.0.2:tree", "-DoutputType=dot", "-DoutputFile={filename}".format(filename=output_file), "-DappendOutput=true"] timed_cmd = TimedCommand(cmd) status, output, _ = timed_cmd.run(timeout=3600) if status != 0 or not output_file.is_file(): # all errors are in stdout, not stderr raise TaskError(output) with output_file.open() as f: return GithubDependencyTreeTask.parse_maven_dependency_tree(f.readlines())
def extract_dependencies(github_repo, github_sha=None, user_flow=False): """Extract the dependencies information. Currently assuming repository is maven/npm/python repository. :param user_flow: to indicate if user flow is invoked :param github_repo: repository url :param github_sha: commit hash :return: set of direct (and indirect) dependencies """ with TemporaryDirectory() as workdir: repo = Git.clone(url=github_repo, path=workdir, timeout=3600) if github_sha is not None: repo.reset(revision=github_sha, hard=True) with cwd(repo.repo_path): # TODO: Make this task also work for files not present in root directory. # First change the package-lock.json to npm-shrinkwrap.json GithubDependencyTreeTask.change_package_lock_to_shrinkwrap() # Since user flow is only called for maven, we pass this flag only to maven if peek(Path.cwd().glob("pom.xml")): return GithubDependencyTreeTask.get_maven_dependencies( user_flow) elif peek(Path.cwd().glob("npm-shrinkwrap.json")) \ or peek(Path.cwd().glob("package.json")): return GithubDependencyTreeTask.get_npm_dependencies( repo.repo_path) elif peek(Path.cwd().glob("requirements.txt")): return GithubDependencyTreeTask.get_python_dependencies( repo.repo_path) elif peek(Path.cwd().glob("glide.lock")): return GithubDependencyTreeTask.get_go_glide_dependencies( repo.repo_path) elif peek(Path.cwd().glob("Gopkg.lock")): return GithubDependencyTreeTask.get_go_pkg_dependencies() else: return None