def file_match(in1, in2, del_dup=False, err=True, out_path=''): """Compares two files and outputs the diff if the files don't match. Note that the files are sorted before comparison. (more generic than run_dq but doesn't work for big files) - del_dup: if true, duplicates are deleted before comparison - err: if True, an exception is raised when the files don't match - out_path: specifies an output path for file comparison different from default """ u.log("[dq] file_match: start") if not out_path: out_path = u.g.dirs['OUT'] + 'file_match_out.csv' s = f"Comparing files '{in1}' and '{in2}'..." u.log(s) l1, l2 = u.load_txt(in1), u.load_txt(in2) l1.sort(), l2.sort() if del_dup: l1, l2 = del_dup_list(l1), del_dup_list(l2) res = l1 == l2 s = "Files match" if res else "Files don't match" u.log(s) if not res: f.diff_list(l1, l2, out_path) if err: u.startfile(out_path) assert res is True u.log("[dq] file_match: end") u.log_print()
def finish_del_dup(out_list, out_path, open_out): u.log(f"Saving list without duplicates in '{out_path}'...") u.save_list(out_list, out_path) bn_out = u.big_number(len(out_list)) u.log(f"List saved, it has {bn_out} lines") if open_out: u.startfile(out_path)
def finish_xml(out_path, start_time): dstr = u.get_duration_string(start_time) bn = u.big_number(gl.N_WRITE) s = f"[toolParseXML] parse_xml: end ({bn} lines written in {dstr})" u.log(s) u.log_print() if gl.OPEN_OUT_FILE: u.startfile(out_path)
def finish_dq(start_time): (dms, dstr) = u.get_duration_string(start_time, True) s = f"[dq] run_dq: end ({dstr})" u.log(s) if gl.MSG_BOX_END: st.msg_box(s, "dq", dms, gl.MIN_DUR_TRIGGER) u.log_print() if gl.OPEN_OUT_FILE: u.startfile(gl.paths["out"])
def extract_doc(in_dirs, out_path): """Extracts all Python doc ('# comments included) present in the 'in_dir' directories (can be usefull for spell check) """ out = [] for in_dir in in_dirs: extract_doc_from_dir(in_dir, out) u.save_list(out, out_path) u.startfile(out_path)
def shuffle_file(in_path, out_path, open_out=False): """Shuffles the line order of a file using the native random package""" u.log("[toolShuf] shuffle_file: start") cur_list = u.load_txt(in_path) if u.has_header(cur_list): header = cur_list[0] cur_list = cur_list[1:] shuffle(cur_list) cur_list = [header] + cur_list u.save_list(cur_list, out_path) u.log(f"Shuffled file saved in {out_path}") if open_out: u.startfile(out_path) u.log("[toolShuf] shuffle_file: end")
def finish_find_dup(dup_list, out_path, open_out): n = len(dup_list) if n == 0: u.log("No duplicates found") return bn = u.big_number(len(dup_list)) u.log(f"{bn} duplicates found") u.log_example(dup_list) u.save_csv(dup_list, out_path) u.log(f"List of duplicates saved in {out_path}") if open_out: u.startfile(out_path)
def finish(out_path, start_time): u.log("Filtering over") bn1 = u.big_number(gl.n_r) bn2 = u.big_number(gl.n_o) s = (f"{bn1} lines read in the input file and" f" {bn2} lines to be written in the output file") u.log(s) u.log("Writing output file...") u.save_csv(gl.out_list, out_path) s = f"Output file saved in {out_path}" u.log(s) dstr = u.get_duration_string(start_time) u.log(f"[toolFilter] filter: end ({dstr})") u.log_print() if gl.OPEN_OUT_FILE: u.startfile(out_path)
def finish(start_time): import partools.utils as u import partools.tools as to import partools.utils.sTools as st if gl.CHECK_DUP: s = "Checking duplicates on the first column of the output file..." u.log(s) to.find_dup(gl.OUT_PATH, col=1) u.log_print('|') (dms, dstr) = u.get_duration_string(start_time, True) s = f"reqlist: end ({dstr})" u.log("[rl] " + s) if gl.MSG_BOX_END: st.msg_box(s, "rl", dms, gl.MIN_DUR_TRIGGER) u.log_print() if gl.OPEN_OUT_FILE: u.startfile(gl.OUT_PATH)
def finish_sbf(out_path, start_time): if gl.FOUND: lowI = gl.c_row - 1 - gl.PRINT_SIZE // 2 if lowI < 0: lowI = 0 highI = gl.c_row - 1 + gl.PRINT_SIZE // 2 u.save_list(gl.cur_list[lowI:highI], out_path) s = f"Current list written in {out_path}" u.log(s.format()) if gl.OPEN_OUT_FILE: u.startfile(out_path) else: bn = u.big_number(gl.c_main) s = (f"EOF reached ({bn} lines, {gl.c_list} temporary lists)" f", string '{gl.LOOK_FOR}' not found") u.log(s) dstr = u.get_duration_string(start_time) u.log(f"[toolBF] search_big_file: end ({dstr})\n")