def _extract_logs(pool, fnames_to_excavate, project_dir, log_cache_dir, use_cached_logs): """ For each fname in `fnames_to_excavate` under `project_dir`, extract the git logs, writing them to a shadowed file hierarchy under `log_cache_dir`. The logs are extracted from git in parallel using the supplied `pool`. Returns a list of tuples of (fname_relative_to_project_dir, path_to_log_file) where path_to_log_file is the path to a file containing a null-byte delimited series of git logs for the fname. """ rel_and_log_z_fnames = [] log_async_results = [] for fname in fnames_to_excavate: rel_name = util.rel_fname(project_dir, fname) log_z_fname = os.path.join(log_cache_dir, rel_name) rel_and_log_z_fnames.append((rel_name, log_z_fname)) if _should_get_log(log_z_fname, use_cached_logs): log_async_results.append(pool.apply_async(_extract_log, (fname, project_dir))) for res in log_async_results: (rel_name, tmp_file) = res.get(REALLY_LONG_TIME) log_z_fname = os.path.join(log_cache_dir, rel_name) util.ensure_containing_dir_exists(log_z_fname) shutil.copyfile(tmp_file, log_z_fname) log.info("Wrote logs for %s" % rel_name) os.unlink(tmp_file) return rel_and_log_z_fnames
def _extract_log(fname, project_dir): """ Extract the git log for `fname` under `project_dir`, writing the null byte delimited entries to a temp file. Returns fname as relative to project_dir, and the name of the temp file. Intended to be called in a separate process as a target of apply_async. """ rel_name = util.rel_fname(project_dir, fname) named_tmp_file = tempfile.NamedTemporaryFile(delete=False) with git_log.raw_log_stream(fname) as raw_log_stream: shutil.copyfileobj(raw_log_stream, named_tmp_file) named_tmp_file.close() return (rel_name, named_tmp_file.name)
def _interesting_fnames_in_proj(project_dir, interesting_fnames_res, boring_fnames_res): """ Return a list of the interesting fnames in the project dir, using the logic of interesting.is_interesting_fname. """ interesting_fnames = [] for fname in project.ls(project_dir): rel_fname = util.rel_fname(project_dir, fname) if is_interesting_fname(rel_fname, interesting_fnames_res, boring_fnames_res): interesting_fnames.append(fname) else: log.info("Skipping fname %s, not interesting" % fname) return interesting_fnames
def test_rel_fname_barfs_on_non_rel_file(): rel_fname("/test/this", "/something/else.txt")
def _check_rel_fname(dirname, fname, expected_rel_fname): eq_(expected_rel_fname, rel_fname(dirname, fname))