def read_big_file(in_path, **kwargs): """Reads a potentially big file See in partools/tools/gl for other parameters (kwargs) See partools/quickstart/tools_bf.py for examples of use """ from .init import init_rbf u.log("[toolBF] read_big_file: start") init_rbf() u.init_kwargs(gl, kwargs) with open(in_path, 'r', encoding='utf-8', errors='ignore') as in_file: line = f.read_file(in_file) u.log_print(line.strip("\n")) while line != "": line = f.read_file(in_file) u.log_print(line.strip("\n")) gl.c_read += 1 if f.check_counter(in_file): continue else: break u.log("[toolBF] read_big_file: end\n")
def init(kwargs): u.init_kwargs(gl, kwargs) init_globals() u.check_header(gl.IN_PATH) u.log(f"Loading input array from '{gl.IN_PATH}'...") gl.ar_in = u.load_csv(gl.IN_PATH) u.log("Input array loaded") u.log_print('|')
def init_dq(kwargs): u.log("[dq] run_dq: start") u.init_kwargs(gl, kwargs) init_tmp_dir() set_paths() s = ( f"run_dq job initialised. Input files {gl.paths['in1']} and {gl.paths['in2']}" " are going to be sorted and compared.") u.log(s) u.log_print('|')
def init(kwargs): from .connect import connect from .init import init_gl u.init_kwargs(gl, kwargs) init_gl() u.mkdirs(gl.TMP_DIR) gl.ref_chunk = 0 gl.c_main = 0 gl.c_chunk = 0 gl.cnx = connect() gl.c = gl.cnx.cursor() gl.data = []
def parse_xml(in_path, out_path, **kwargs): """Converts a potentially big xml file into csv See in partools/tools/gl for other parameters (kwargs) See partools/quickstart/tools_xml.py for examples of use """ from .finish import finish_xml u.log("[toolParseXML] parse_xml: start") start_time = time() u.init_kwargs(gl, kwargs) init_globals() gen_img_dict(in_path) save_img_dict(out_path) finish_xml(out_path, start_time)
def download(**kwargs): """Performs multi threaded SQL queries on an Oracle DB See README.md for guidance See partools/quickstart/sql_download.py for examples of use """ u.log('[sql] download: start') reload(gl) # reinit globals start_time = time() u.init_kwargs(gl, kwargs) init() get_query_list() recover() process_query_list() finish(start_time)
def flt(in_path, out_path, **kwargs): """Filters and/or extracts columns from a csv file See in partools/tools/gl for other parameters (kwargs) See partools/quickstart/tools_filter.py for examples of use """ u.log("[toolFilter] filter: start") start_time = time() u.init_kwargs(gl, kwargs) init_globals(in_path) u.log(f"Filtering file '{in_path}'...") with open(in_path, 'r', encoding='utf-8') as in_file: process_header(in_file) line = in_file.readline() while line: process_line(line) line = in_file.readline() finish(out_path, start_time)
def search_big_file(in_path, out_path, look_for, **kwargs): """Searches a potentially big file See in partools/tools/gl for other parameters (kwargs) See partools/quickstart/tools_bf.py for examples of use """ from .init import init_sbf from .finish import finish_sbf u.log("[toolBF] search_big_file: start") start_time = time() init_sbf(in_path, look_for) u.init_kwargs(gl, kwargs) u.log(gl.s_init) with open(in_path, 'r', encoding='utf-8', errors='ignore') as in_file: while not gl.EOF: f.fill_cur_list(in_file) if f.search_cur_list(): break finish_sbf(out_path, start_time)
def execute(**kwargs): """Executes a SQL script or a PL/SQL procedure on an Oracle DB See README.md for guidance See partools/quickstart/sql_execute.py for examples of use """ u.log('[sql] execute: start') start_time = time() u.init_kwargs(gl, kwargs) init_gl() script = get_final_script(gl.SCRIPT_IN) cnx = connect() c = cnx.cursor() if gl.PROC: u.log("Executing proc:") u.log_print(script) c.execute(script) u.log("Proc executed") else: command_list = script.split(';\n') n = len(command_list) if command_list[n - 1]: command_list[n - 1] = command_list[n - 1].strip(';') else: command_list = command_list[:-1] for command in command_list: u.log("Executing command:") u.log_print(command) c.execute(command) u.log("Command executed") c.close() cnx.commit() cnx.close() dstr = u.get_duration_string(start_time) u.log(f"[sql] execute: end ({dstr})") u.log_print()
def split_file(in_path, out_dir='', **kwargs): """Splits a file into multiple files (e.g. if it is too big to be opened with an app such as Excel) See in partools/tools/gl for other parameters (kwargs) See partools/quickstart/tools_filter.py for examples of use """ u.log("[toolSplit] split_file: start") u.init_kwargs(gl, kwargs) init_globals() (file_dir, file_name, ext) = parse_in_path(in_path, out_dir) gl.header = u.get_header(in_path) with open(in_path, 'r', encoding='utf-8') as in_file: while True: gl.N_OUT += 1 out_path = f'{file_dir}{file_name}_{gl.N_OUT}.{ext}' if not gen_split_out(out_path, in_file): break u.log("[toolSplit] split_file: end") u.log_print()