def muck_clean_all(args): ''' `muck clean-all` command. ''' if args: failF('muck clean-all error: clean-all command no arguments; use clean to remove individual products.') remove_dir_contents(build_dir)
def source_candidate(ctx, target_path, src_dir, prod_name): src_dir_names = list_dir_filtered(src_dir or '.', cache=ctx.dir_names) candidates = list(filter_source_names(src_dir_names, prod_name)) if len(candidates) == 1: return candidates[0] # error. deps = ', '.join(sorted(ctx.dependents[target_path])) or target_path if len(candidates) == 0: failF(deps, 'no source candidates matching `{}`', target_path) else: failF(deps, 'multiple source candidates matching `{}`: {}', target_path, candidates)
def muck_clean(ctx, args): ''' `muck clean` command. ''' if not args: failF('muck clean error: clean command takes specific target arguments; use clean-all to remove all products.') for target in args: if not ctx.db.contains_record(target_path=target): errFL('muck clean note: {}: skipping unknown target.', target) continue prod_path = product_path_for_target(target) remove_file_if_exists(prod_path) ctx.db.delete_record(target_path=target)
def check_product_not_modified(ctx, target_path, actual_path, size, mtime, old): # existing product should not have been modified since record was stored. # if the size changed then it was definitely modified. # otherwise, if the mtime is unchanged, assume that the file is ok, for speed. # if the mtime changed, check the hash; # the user might have made an accidental edit and then reverted it, # and we would rather compute the hash than report a false problem. if size != old.size or (mtime != old.mtime and hash_for_path(actual_path) != old.hash): ctx.dbgF(target_path, 'size: {} -> {}; mtime: {} -> {}', old.size, size, old.mtime, mtime) # TODO: change language depending on whether product is derived from a patch? failF(target_path, 'existing product has changed; did you mean to update a patch?\n' ' Otherwise, save your changes if necessary and then `muck clean {}`.', target_path)
def pat_dependencies(src_path, src_file, dir_names): ''' Return a list of dependencies; `dir_names` is an ignored parameter provided by muck. A .pat file always has a single dependency: the source file it patches. ''' version_line = src_file.readline() orig_line = src_file.readline() orig_path = orig_line.strip() if not orig_path: failF( 'pat_dependencies: {}:2: line specifying original path is missing or empty.', src_path) return [orig_path]
def hash_for_path(path: str, max_chunks=sys.maxsize) -> bytes: ''' return a hash string for the contents of the file at the given path. ''' try: f = open(path, 'rb') except IsADirectoryError: failF(path, 'expected a file but found a directory') h = sha256() # a quick timing experiment suggested that chunk sizes larger than this are not faster. chunk_size = 1 << 16 for i in range(max_chunks): chunk = f.read(chunk_size) if not chunk: break h.update(chunk) return h.digest()
def update_product_with_tmp(ctx: Ctx, src: str, tmp_path: str): product_path, ext = split_stem_ext(tmp_path) if ext not in (out_ext, tmp_ext): failF(tmp_path, 'product output path has unexpected extension: {!r}', ext) if not is_product_path(product_path): failF(product_path, 'product path is not in build dir.') target_path = product_path[len(build_dir_slash):] size, mtime, old = calc_size_mtime_old(ctx, target_path, tmp_path) file_hash = hash_for_path(tmp_path) is_changed = (size != old.size or file_hash != old.hash) if is_changed: ctx.db.delete_record(target_path=target_path) # delete metadata if it exists, just before overwrite, in case muck fails before update. move_file(tmp_path, product_path, overwrite=True) # move regardless; if not changed, just cleans up the identical tmp file. noteF(target_path, 'product {}; {}.', 'changed' if is_changed else 'did not change', format_byte_count(size)) return update_deps_and_record(ctx, target_path, product_path, is_changed=is_changed, size=size, mtime=mtime, file_hash=file_hash, src=src, old=old)
def update_dependency(ctx: Ctx, target_path: str, dependent: Optional[str], force=False) -> bool: ''' returns is_changed. ''' target_ext = path_ext(target_path) if not target_path.strip(): failF(repr(target_path), 'invalid target name.') if target_path in reserved_names: failF(target_path, 'target name is reserved; please rename the target.') if target_ext in reserved_exts: failF(target_path, 'target name has reserved extension; please rename the target.') if dependent is not None: ctx.dependents[target_path].add(dependent) try: # if in ctx.statuses, this path has already been visited on this run. status = ctx.statuses[target_path] if status is Ellipsis: # recursion sentinal. involved_paths = sorted(path for path, status in ctx.statuses.items() if status is Ellipsis) failF(target_path, 'target has circular dependency; involved paths:\n {}', '\n '.join(involved_paths)) return status except KeyError: pass ctx.statuses[target_path] = Ellipsis # recursion sentinal is replaced before return. ctx.dbgF(target_path, 'examining... (dependent={})', dependent) is_product = not path_exists(target_path) actual_path = product_path_for_target(target_path) if is_product else target_path size, mtime, old = calc_size_mtime_old(ctx, target_path, actual_path) has_old_file = (mtime > 0) has_old_record = not is_empty_record(old) is_changed = force or (not has_old_file) or (not has_old_record) if has_old_record: old_is_product = (old.src is not None) if is_product != old_is_product: # nature of the target changed. noteF(target_path, 'target is {} a product.', 'now' if is_product else 'no longer') is_changed = True if not has_old_file and target_ext: # product was deleted and not a phony target. noteF(target_path, 'old product was deleted.') if is_product: if has_old_file and has_old_record: check_product_not_modified(ctx, target_path, actual_path, size=size, mtime=mtime, old=old) return update_product(ctx, target_path, actual_path, is_changed=is_changed, size=size, mtime=mtime, old=old) else: return update_non_product(ctx, target_path, is_changed=is_changed, size=size, mtime=mtime, old=old)
def __init__(self, path): self.conn = connect(path) self.conn.isolation_level = None # autocommit mode. try: self.run('SELECT COUNT(*) FROM sqlite_master') # dummy query to check file integrity. except DatabaseError as e: if e.args[0] == 'file is encrypted or is not a database': failF('muck error: database is outdated or corrupt; run `muck clean-all`.') else: raise self.run(''' CREATE TABLE IF NOT EXISTS targets ( id INTEGER PRIMARY KEY, path TEXT, size INT, mtime REAL, hash BLOB, src TEXT, deps BLOB )''') self.run('CREATE UNIQUE INDEX IF NOT EXISTS target_paths ON targets(path)')
def muck_patch(ctx, args): if not len(args) in (1, 2): failF('''\ muck patch error: patch command takes one or two arguments. usage: muck patch [original_target] [target] creates a new target by copying either the source or product of the original to _build/[target], and then creates an empty [target].pat. muck patch [target.pat] update the patch file with the diff of the previously specified original and target. ''') if len(args) == 2: # create new patch. orig_target_path, target_path = args if orig_target_path.endswith('.pat'): errFL('muck patch error: original should not be a patch file: {}', orig_target_path) if target_path.endswith('.pat'): errFL('muck patch error: {} {}: target should not be a patch file: {}', target_path) patch_path = target_path + '.pat' if path_exists(patch_path): failF('muck patch error: {}: patch already exists.', patch_path) update_dependency(ctx, orig_target_path, dependent=None) orig_path = actual_path_for_target(orig_target_path) prod_path = product_path_for_target(target_path) if path_exists(prod_path): errFL('muck patch note: product already exists: {}', prod_path) else: errFL('muck patch note: copying original to product: {} -> {}', orig_path, prod_path) copy_file(orig_path, prod_path) else: # update existing patch. patch_path = args[0] if path_ext(patch_path) != '.pat': failF('muck patch error: argument does not specify a .pat file: {!r}', patch_path) deps = pat_dependencies(patch_path, open(patch_path), {}) orig_target_path = deps[0] update_dependency(ctx, orig_target_path, dependent=None) orig_path = actual_path_for_target(orig_target_path) target_path = path_stem(patch_path) prod_path = product_path_for_target(target_path) # update patch (both cases). patch_path_tmp = patch_path + tmp_ext cmd = ['pat', 'diff', orig_path, prod_path] errFL('muck patch note: diffing: `{}`', ' '.join(shlex.quote(w) for w in cmd)) with open(patch_path_tmp, 'wb') as f: code = runC(cmd, out=f) move_file(patch_path_tmp, patch_path, overwrite=True) if len(args) == 1: # updated existing patch. # need to remove or update the target record to avoid the 'did you mean to patch?' safeguard. # for now, just delete it to be safe; this makes the target look stale. try: ctx.db.delete_record(target_path=target_path) except KeyError: pass
def patch_failF(startline_num, fmt, *items): failF('{}:{}: ' + fmt, f_patch.name, start_line_num + 1, *items)
def pat_dependencies(src_path, src_file, dir_names): failF(src_path, '`pat` is not installed; run `pip install pat-tool`.')
def main_diff(args): 'diff command entry point.' original = args.original modified = args.modified f_out = args.destination min_context = args.min_context if min_context < 1: failF('min-context value must be positive.') if original.name.find('..') != -1: failF("original path cannot contain '..': {!r}", original.name) o_lines = original.readlines() m_lines = modified.readlines() if o_lines and not o_lines[-1].endswith('\n'): failF( '{}:{} original document is missing final newline (not yet supported).' ) if m_lines and not m_lines[-1].endswith('\n'): failF( '{}:{} modified document is missing final newline (not yet supported).' ) def write(line): f_out.write(line) write('pat v' + pat_version + '\n') orig_path_clean = args.original.name if orig_path_clean.startswith('_build/'): orig_path_clean = orig_path_clean[len('_build/'):] write(orig_path_clean + '\n') line_indices = defaultdict(set) # maps line contents to line numbers. for i, line in enumerate(o_lines): line_indices[line].add(i) matches = diff_lines(o_lines, m_lines) # returns triples of form (i, j, n); o_lines[i:i+n] == m_lines[j:j+n]. # matches are monotonically increasing in i and j. # the last match is a sentinal with (len(o_lines), len(m_lines), 0). # it is the only match with n == 0. # for non-sentinel adjacent matches (i, j, n) and (i1, j1, n1), # then i+n != i1 or j+n != j1, or both. # in other words, adjacent matches always describe non-adjacent equal blocks. # conceptually, we turn the sequence of matches into a sequence of hunks, # where each hunk is a pair of (match, diff). # however actually materializing the diff is not necessary; # we simply get the match at the start, or synthesize an empty one as appropriate. has_start_symbol = False i, j, n = matches[0] match_iter = iter(matches) if i == 0 and j == 0: # real match at start. if n == len(o_lines) and n == len(m_lines): # o and m are identical. return # avoids outputting a trailing newline. next(match_iter) # advance. else: # need a dummy match to start the first hunk. i, j, n = (0, 0, 0) for i1, j1, n1 in match_iter: di = i + n # beginning of diff for o. dj = j + n # beginning of diff for m. if di == len(o_lines) and dj == len(m_lines): break # no diff. # calculate how much context we need for this hunk to be ambiguous. # this includes the lines subtracted from the original in the calculation. # start with the last deleted line of the current diff in o. ci = i1 - 1 matching_indices = line_indices[o_lines[ci]] if (ci >= 0) else set() # iterate back through the first line of context. #errFL('\ni:{}-{}-{} j:{}-{}-{} ci:{} mi:{}', i, di, i1, j, dj, j1, ci, matching_indices) for ci in range(ci - 1, i - 1, -1): decr_indices = {j - 1 for j in matching_indices } # step all candidates backwards. curr_indices = line_indices[o_lines[ci]] matching_indices = decr_indices.intersection(curr_indices) #errFL(' ci: {}; decr:{} curr:{} mi:{}', ci, decr_indices, curr_indices, matching_indices) if len(matching_indices) == 1: break ci = max(i, min(ci, di - min_context)) #errFL('* ci:{}', ci) if ci == 0 and not has_start_symbol: has_start_symbol = True write('\n|^\n' ) # add first separator line and start-of-file symbol line. elif i < ci: # not merged with previous hunk; separate with blank line. write('\n') # output context and diff. for o in range(ci, di): write('| ' + o_lines[o]) # write context lines. for o in range(di, i1): write('- ' + o_lines[o]) # remove lines from original. for m in range(dj, j1): write('+ ' + m_lines[m]) # add lines from modified. i = i1 j = j1 n = n1
def main_gitdiff(args): original = args.original modified = args.modified f_out = args.destination min_context = args.min_context if original.name.find('..') != -1: failF("original path cannot contain '..': {!r}", original.name) origLines = original.readlines() s = SequenceMatcher(None) s.set_seq2(origLines) out = sys.stdout # get the buffer. cmd = 'git diff --exit-code --no-index --no-color --histogram --unified=0'.split( ) cmd.append(original.name) cmd.append(modified.name) sub = run(cmd, stdout=PIPE, universal_newlines=True) if sub.returncode not in (0, 1): failF('pat: git diff failed with status code: {}.', sub.returncode) out.write('pat v0\n') groups = group_seq_by_heads(seq=sub.stdout.splitlines(True), is_head=lambda line: line.startswith('@@'), headless=HeadlessMode.drop) # print("Number of groups",len(groups)) for group in groups: # out.write('Group\n') head = group[0] a = re.search('-.*\+', str(head)).group(0) # out.write('Start line: ') # Array index starts with 0 git diff is with 1 start_line_num = a[1:a.find(',')] # out.write(start_line_num.encode('ascii') + '\n'.encode('ascii')) start_line_num = int(a[1:a.find(',')]) # Special case when start line number from git diff is 0. Its always a line added in the begining if (start_line_num == 0): hunk_start = start_line_num hunk_end = start_line_num else: start_line_num = start_line_num - 1 hunk_start = start_line_num - min_context if hunk_start < 0: hunk_start = 0 hunk_end = start_line_num #out.write("Hunk Start"), hunk_start) #print("Hunk End", hunk_end) hunk_start, hunk_end = get_hunk_context(s, hunk_start, hunk_end, origLines) if hunk_start == 0: out.write('|^\n') for line in origLines[hunk_start:hunk_end]: out.write('| ') out.write(line) for line in group[1:]: if line.startswith('-'): out.write('- ') out.write(line[1:]) elif line.startswith('+'): out.write('+ ') out.write(line[1:]) else: failF('invalid line in diff output: {!r}', line)
def py_fail(src_path, node, name, msg): errF('muck error: {}: ', src_path) failF('{}:{}: `{}`: {}.', node.lineno, node.col_offset, name, msg)
def writeup_dependencies(src_path, src_file, dir_names): failF(src_path, '`writeup` is not installed; run `pip install writeup-tool`.')
def build_product(ctx, target_path: str, src_path: str, prod_path: str) -> bool: ''' Run a source file, producing zero or more products. Return a list of produced product paths. ''' src_ext = path_ext(src_path) try: build_tool = build_tools[src_ext] except KeyError: # TODO: fall back to generic .deps file. failF(target_path, 'unsupported source file extension: `{}`', src_ext) prod_path_out = prod_path + out_ext prod_path_tmp = prod_path + tmp_ext remove_file_if_exists(prod_path_out) remove_file_if_exists(prod_path_tmp) if not build_tool: noteF(target_path, 'no op.') return False # no product. prod_dir = path_dir(prod_path) make_dirs(prod_dir) # Extract args from the combination of wilds in the source and the matching target. m = match_wilds(target_path_for_source(src_path), target_path) if m is None: failF(target_path, 'internal error: match failed; src_path: {!r}', src_path) argv = [src_path] + list(m.groups()) cmd = build_tool + argv try: env_fn = build_tool_env_fns[src_ext] except KeyError: env = None else: env = os.environ.copy() custom_env = env_fn() env.update(custom_env) noteF(target_path, 'building: `{}`', ' '.join(shlex.quote(w) for w in cmd)) out_file = open(prod_path_out, 'wb') time_start = time.time() code = runC(cmd, env=env, out=out_file) time_elapsed = time.time() - time_start out_file.close() if code != 0: failF(target_path, 'build failed with code: {}', code) def cleanup_out(): if file_size(prod_path_out) == 0: remove_file(prod_path_out) else: warnF(target_path, 'wrote data directly to `{}`;\n ignoring output captured in `{}`', prod_path_tmp, prod_path_out) manif_path = manifest_path(argv) try: f = open(manif_path) except FileNotFoundError: # no list. if not path_exists(prod_path_tmp): via = 'stdout' tmp_paths = [prod_path_out] else: via = 'tmp' tmp_paths = [prod_path_tmp] cleanup_out() else: via = 'manifest' tmp_paths = list(line[:-1] for line in f) # strip newlines. cleanup_out() if ('%' not in prod_path_tmp) and prod_path_tmp not in tmp_paths: failF(target_path, 'product does not appear in manifest ({} records): {}', len(tmp_paths), manif_path) remove_file(manif_path) time_msg = '{:0.2f} seconds '.format(time_elapsed) if ctx.report_times else '' noteF(target_path, 'finished: {}(via {}).', time_msg, via) return tmp_paths