Exemplo n.º 1
0
def get_duration():
    u.log_print("Test string.get_duration", dashes=100)
    dstr = u.get_duration_string(0, end_time=0.35)
    u.log(dstr)
    assert dstr == "350 ms"
    dstr = u.get_duration_string(0, end_time=5.369)
    u.log(dstr)
    assert dstr == "5.3 s"
    dstr = u.get_duration_string(0, end_time=150)
    u.log(dstr)
    assert dstr == "2 minutes and 30 seconds"
    u.log_print()
Exemplo n.º 2
0
def left_join_files(lpath='', rpath='', out='', debug=False):
    """Joints two files (lpath and rpath) on the first column of each file"""
    from .init import init_globals
    from .join import left_join_arrays

    u.log("[rl] left_join_files: start")
    start_time = time()
    if debug:
        gl.DEBUG_JOIN = True
    if lpath or rpath:
        init_globals()
        u.log(f"Loading arrays from '{lpath}' and '{rpath}'...")
        gl.ar_in = u.load_csv(lpath)
        ar_right = u.load_csv(rpath)
        u.log("Arrays loaded")
        u.log_print('|')
    else:
        u.log("Loading right arrays...")
        ar_right = u.load_csv(gl.OUT_SQL)
        u.log("Right array loaded")
    left_join_arrays(gl.ar_in, ar_right)
    if not out:
        out = gl.OUT_PATH
    u.log("Saving output file...")
    u.save_csv(gl.out_array, out)
    s = f"Output file saved in {out}"
    u.log(s)
    dstr = u.get_duration_string(start_time)
    u.log(f"[rl] left_join_files: end ({dstr})")
    u.log_print('|')
Exemplo n.º 3
0
def finish(out_path, prompt, nb, start_time):

    n_dup_key = len(gl.dup_key_list)
    n_dup = len(gl.dup_list)
    bn1 = u.big_number(gl.c_tot_out)
    bn2 = u.big_number(n_dup)
    s = (f"Output file {out_path} successfully generated"
         f" ({bn1} lines written, {bn2} pure duplicates removed).")
    u.log(s)
    if n_dup > 0:
        if nb != 0:
            out_dup = gl.OUT_DUP_FILE + str(nb) + gl.FILE_TYPE
        else:
            out_dup = gl.OUT_DUP_FILE + gl.FILE_TYPE
        u.save_csv(gl.dup_list, out_dup)
        u.log(f"Pure duplicates list written in {out_dup}")
        u.log_example(gl.dup_list, "pure duplicates")
    if n_dup_key > 0:
        if prompt:
            prompt_dup_key(n_dup_key)
        else:
            u.save_csv(gl.dup_key_list, gl.OUT_DUP_KEY_FILE)
            s = f"{n_dup_key} key duplicates found. List written in {gl.OUT_DUP_KEY_FILE}"
            u.log(s)

    dstr = u.get_duration_string(start_time)
    u.log(f"[dq] sort_file: end ({dstr})")
Exemplo n.º 4
0
def finish_this(start_time):

    gl.cnx.close()
    os.remove(gl.tmp_file_chunk)
    bn = u.big_number(gl.c_main)
    dstr = u.get_duration_string(start_time)
    u.log(f"{bn} lines exported")
    u.log(f"[sql] upload: end ({dstr})")
Exemplo n.º 5
0
def finish_xml(out_path, start_time):

    dstr = u.get_duration_string(start_time)
    bn = u.big_number(gl.N_WRITE)
    s = f"[toolParseXML] parse_xml: end ({bn} lines written in {dstr})"
    u.log(s)
    u.log_print()
    if gl.OPEN_OUT_FILE:
        u.startfile(out_path)
Exemplo n.º 6
0
def finish_dq(start_time):

    (dms, dstr) = u.get_duration_string(start_time, True)
    s = f"[dq] run_dq: end ({dstr})"
    u.log(s)
    if gl.MSG_BOX_END:
        st.msg_box(s, "dq", dms, gl.MIN_DUR_TRIGGER)
    u.log_print()
    if gl.OPEN_OUT_FILE:
        u.startfile(gl.paths["out"])
Exemplo n.º 7
0
def finish(out_path, start_time):

    u.log("Filtering over")
    bn1 = u.big_number(gl.n_r)
    bn2 = u.big_number(gl.n_o)
    s = (f"{bn1} lines read in the input file and"
         f" {bn2} lines to be written in the output file")
    u.log(s)

    u.log("Writing output file...")
    u.save_csv(gl.out_list, out_path)
    s = f"Output file saved in {out_path}"
    u.log(s)
    dstr = u.get_duration_string(start_time)
    u.log(f"[toolFilter] filter: end ({dstr})")
    u.log_print()
    if gl.OPEN_OUT_FILE:
        u.startfile(out_path)
Exemplo n.º 8
0
def finish(start_time):
    import partools.utils as u
    import partools.tools as to
    import partools.utils.sTools as st

    if gl.CHECK_DUP:
        s = "Checking duplicates on the first column of the output file..."
        u.log(s)
        to.find_dup(gl.OUT_PATH, col=1)
        u.log_print('|')

    (dms, dstr) = u.get_duration_string(start_time, True)
    s = f"reqlist: end ({dstr})"
    u.log("[rl] " + s)
    if gl.MSG_BOX_END:
        st.msg_box(s, "rl", dms, gl.MIN_DUR_TRIGGER)
    u.log_print()
    if gl.OPEN_OUT_FILE:
        u.startfile(gl.OUT_PATH)
Exemplo n.º 9
0
def compare_files(in_1, in_2, out_path):
    from .csf import compare_sorted_files

    u.log("[dq] compare_files: start")
    start_time = time()
    u.gen_header(in_1, gl.COMPARE_FIELD, out_path)
    compare_sorted_files(in_1, in_2, out_path)

    if gl.c_diff == 0:
        u.log("Files match")
        out = True
    else:
        bn = u.big_number(gl.c_diff)
        u.log(f"{bn} differences found")
        out = False

    dstr = u.get_duration_string(start_time)
    u.log(f"[dq] compare_files: end ({dstr})")

    return out
Exemplo n.º 10
0
def finish_sbf(out_path, start_time):

    if gl.FOUND:
        lowI = gl.c_row - 1 - gl.PRINT_SIZE // 2
        if lowI < 0:
            lowI = 0
        highI = gl.c_row - 1 + gl.PRINT_SIZE // 2
        u.save_list(gl.cur_list[lowI:highI], out_path)
        s = f"Current list written in {out_path}"
        u.log(s.format())
        if gl.OPEN_OUT_FILE:
            u.startfile(out_path)
    else:
        bn = u.big_number(gl.c_main)
        s = (f"EOF reached ({bn} lines, {gl.c_list} temporary lists)"
             f", string '{gl.LOOK_FOR}' not found")
        u.log(s)

    dstr = u.get_duration_string(start_time)
    u.log(f"[toolBF] search_big_file: end ({dstr})\n")
Exemplo n.º 11
0
def send_chunk_duration(start):
    """Sends The duration of one insert to the main process.

    It is not wanted to send the duration of the first insert as it
    might be longer than expected due to cache mechanisms. Hence, the
    duration of the second insert is sent to the main process
    """

    if not gl.MD:
        return

    # We only send the duration of the second insert
    if gl.c_main // gl.NB_MAX_ELT_INSERT != 2:
        return

    if not gl.MD["T"]:
        (dms, dstr) = u.get_duration_string(start, True)
        u.log(f"Sending duration to the main process ({dstr})...")
        if dms == 0:
            dms = 1
        gl.MD["T"] = dms
Exemplo n.º 12
0
def execute(**kwargs):
    """Executes a SQL script or a PL/SQL procedure on an Oracle DB

    See README.md for guidance

    See partools/quickstart/sql_execute.py for examples of use
    """

    u.log('[sql] execute: start')
    start_time = time()
    u.init_kwargs(gl, kwargs)
    init_gl()
    script = get_final_script(gl.SCRIPT_IN)
    cnx = connect()
    c = cnx.cursor()
    if gl.PROC:
        u.log("Executing proc:")
        u.log_print(script)
        c.execute(script)
        u.log("Proc executed")
    else:
        command_list = script.split(';\n')
        n = len(command_list)
        if command_list[n - 1]:
            command_list[n - 1] = command_list[n - 1].strip(';')
        else:
            command_list = command_list[:-1]
        for command in command_list:
            u.log("Executing command:")
            u.log_print(command)
            c.execute(command)
            u.log("Command executed")
    c.close()
    cnx.commit()
    cnx.close()

    dstr = u.get_duration_string(start_time)
    u.log(f"[sql] execute: end ({dstr})")
    u.log_print()