def esult_frames(data_frame, search_column, real_list, file_name): """ For opening only rows with results that match main window search criteria :param data_frame: New frame created from file :param search_column: Column to search -from main window :param real_list: Stripped list of item(s) to search -from main window :param file_name: Current Files Name :return: Results from File """ new_output = [] new_field = GenFuncs.strip_dir(file_name) if not isinstance(real_list, str): list_str_var = 2 else: list_str_var = 1 new_output.append( SearchDataFrame.criteria_by_column(search_column, real_list, new_field, list_str_var, data_frame)) try: if new_output == [None]: new_output2 = pd.DataFrame({'A': []}) except ValueError: new_output2 = pd.concat(new_output, axis=0, sort=False, ignore_index=True) return new_output2
def exp_imp_sets(self, root, dir, func=1): """ Button for Exporting/Importing Output Settings :param root: Parent Frame :param dir: Default Search output directory :param func: 1= Import, 0= Export """ my_filetypes = [('Text Documents', '.txt')] if func == 1: eltit = "Select saved file:" edom = 'r' place_holda = filedialog.askopenfilename else: eltit = "Save as:" edom = 'w' place_holda = filedialog.asksaveasfile file_w_headers = place_holda(parent=root, initialdir=dir, title=eltit, filetypes=my_filetypes) if file_w_headers: GenFuncs.exp_imp_func(file_w_headers, edom)
def open_files(func=1): """ Select and Open Files. :param func: ==1 Open Selected files from within a folder . :param func: ==2 Open files within Selected Directory. """ global answer, footer,ents, ents2, row, thread_busy if not thread_busy: thread_busy = True inp_opts = GenFuncs.get_inp_opts() new_list = [] loc_answer = [] if func==1: files_answer = filedialog.askopenfilenames(parent=footer, initialdir=os.getcwd(), title="Please select one or more files:", filetypes=my_filetypes) try: new_list, loc_answer = GenFuncs.get_file_list(files_answer,answer,func=1) except TypeError: new_list, loc_answer = [], [] elif func==2: check_name_temp = messagebox.askyesno("File_Pal_1.2", "Do you want to specify the first characters?") if check_name_temp: name_str = simpledialog.askstring("File_Pal_1.2", "First part of name for the files you want to open?", parent=root) else: name_str = '' directory = filedialog.askdirectory(parent=root, initialdir=os.getcwd(), title="Please select Directory:") try: new_list, loc_answer = GenFuncs.get_file_list(directory, answer, check_name_temp, name_str, func=2) except TypeError: new_list, loc_answer = [], [] if (len(loc_answer) + len(new_list)) > 0: if inp_opts[0]['Main Win Criteria']: temp_opts = list(inp_opts) search_column = (ents[0][1].get()).strip() real_list = Split_Entry.split(ents[1][1].get()) temp_opts.extend((search_column, real_list)) inp_opts = temp_opts if (len(new_list) > 1) and (inp_opts[0]['CPU Cores'] > 1): pool = Pool(processes=inp_opts[0]['CPU Cores']) df_list = pool.map(partial(OpenFile.open_file, inp_options=inp_opts, root=row), new_list) for i in range(len(new_list)): if not df_list[i][0].empty: frame_class = FileFrame(df_list[i][0], df_list[i][1], df_list[i][2]) li.append(frame_class) answer.append(df_list[i][1]) else: print(df_list[i][1] + ' didn\'t have the certian input requirements.') else: loc_answer.extend(new_list) try: for file in loc_answer: if file not in answer: try: dataframe = OpenFile.open_file(file, inp_opts, row) if not dataframe[0].empty: frame_class = FileFrame(dataframe[0],dataframe[1], dataframe[2]) li.append(frame_class) answer.append(file) row = reset_frame(row, root) except PermissionError as e: print(e) print('This file is currently locked.') row = reset_frame(row, root) except ValueError as e: print(e) row = reset_frame(row, root) except KeyboardInterrupt as e: print(e) footer = reset_frame(footer, root, True) thread_busy = False
def ow_frames(input_criteria, opened_files, data_frames, auto_open_var, output_type, file_list, root2=False, func=0, file_name='default'): """ Search Open Files by Input Criteria and output file :param input_criteria: Search Column and Search Item(s) :param opened_files: List of opened files with Checkbutton variables :param data_frames: list of open files(classes) :param auto_open_var: Main window Checkbutton variable for Auto-Open :param output_type: type of output - set to xlsx for a while :param file_list: Search Order List """ def get_checked_l(): checked_list = [] for file in file_list: # Iterate through DataFrames using i as index ind = file_list.index(file) if opened_files[ind][2].get() == 1: checked_list.append(file) return checked_list start = time.time() new_output = [] # Search results per DataFrame if not _test: root = Frame(root2) progress = Progressbar(root, orient=HORIZONTAL, length=300, mode='determinate') progress.pack(fill=X) v = StringVar() Label(root, textvariable=v).pack() root.pack() if func == 0 or func == 3: if func == 0: print('Searching:\n' + input_criteria[1][1].get()) search_column = (input_criteria[0][1].get()).strip() out_d_and_n, zeros_dict, font_type_size, col_width, dec_rules,\ dec_place = GenFuncs.get_out_opts(input_criteria, search_column,output_type) # Load Settings elif func == 3: out_d_and_n, zeros_dict, font_type_size, col_width, dec_rules,\ dec_place = GenFuncs.get_out_opts(input_criteria, input_criteria[0],output_type, func=1, file_name=file_name) # Load Settings checked_l = get_checked_l() for file in checked_l: ind = file_list.index(file) if not _test: progress_bar_ind = checked_l.index(file) progress['value'] = (( (progress_bar_ind + 1) / len(checked_l)) * 100) / 2 v.set("Searching : " + GenFuncs.strip_dir(file)) root.update_idletasks() if func != 3: results = data_frames[ind].search_col( search_column, input_criteria[1][1].get(), zeros_dict) # <-need to move else: results = data_frames[ind].search_col( input_criteria[0], input_criteria[1], zeros_dict) try: if not results.empty: new_output.append(results) except AttributeError: pass else: new_new_output = opened_files output_type = 'xlsx' out_d_and_n, zeros_dict, font_type_size, \ col_width, dec_rules, dec_place = GenFuncs.get_out_opts("", "",output_type, func=1, file_name=file_name) # Load Settings output_directory = os.path.dirname(out_d_and_n) for FP_out in os.listdir( output_directory): # Clean output folder of past search items if FP_out.endswith("FP_out.xlsx"): try: os.remove(os.path.join(output_directory, FP_out)) except PermissionError: pass if dec_place != False: dec_var = '%.' + str(dec_place) + 'f' else: dec_var = "%.2f" if not _test: v.set("Formatting Output") root.update_idletasks() try: if func == 0 or func == 3: try: new_new_output = pd.concat(new_output, axis=0, sort=False, ignore_index=True) except: print("No results") if not _test: progress.destroy() v.set("No results") root.update_idletasks() time.sleep(2) root.destroy() return #var_file = shelve.open(os.path.join(os.path.expanduser('~'),'var_file')) #try: # plug_dicts = var_file['plug_lists'] # var_file.close() # for key, value in plug_dicts.items(): # if value[0] == 1: # new_new_output = value[1].run(new_new_output) #except KeyError: # var_file.close() #print('fail retrieve_info') cols_index = [] for col in new_new_output: cols_index.append(col) if output_type == 'csv': new_new_output.to_csv(out_d_and_n, index=False) elif output_type == 'xlsx': writer_orig = pd.ExcelWriter(out_d_and_n, engine='xlsxwriter') new_new_output.to_excel(writer_orig, index=False, sheet_name='SearchOutput', float_format=dec_var) workbook = writer_orig.book worksheet = writer_orig.sheets['SearchOutput'] size = 10 f_rule_cnt = len(font_type_size) + len(col_width) + len( dec_rules) crnt_rule = 0 if font_type_size != {}: # Set Global Font Size / Type try: size = int(list(font_type_size.values())[0]) if size != False: workbook.formats[0].set_font_size(size) if list(font_type_size.keys())[0] != False: workbook.formats[0].set_font_name( list(font_type_size.keys())[0]) if not _test: progress['value'] = (( (crnt_rule / f_rule_cnt) * 100) / 2) + 50 crnt_rule += 1 v.set(v.get() + ".") root.update_idletasks() except IndexError: pass if len(col_width) > 0: # Set Column / Widths for rule in col_width.items(): worksheet.set_column(rule[0], int(rule[1])) if not _test: progress['value'] = (( (crnt_rule / f_rule_cnt) * 100) / 2) + 50 crnt_rule += 1 v.set(v.get() + ".") root.update_idletasks() try: writer_orig.save() except Exception as e: print(e) print("File with same criteria already open?") if auto_open_var.get() == 1: try: if platform == "linux" or platform == "linux2": opener = "open" if sys.platform == "darwin" else "xdg-open" subprocess.call([opener, out_d_and_n]) else: os.startfile(out_d_and_n, 'open') except: print( 'Error while trying to open application\nPlease set default xlsx application' ) end = time.time() print('-------' + str(end - start) + '-------') else: end = time.time() print('-------' + str(end - start) + '-------') print('done') if not _test: root.destroy() except PermissionError as e: print(str(e)[:28] + ": Close File Before Searching")
def open_func(entry, orig_headers, inp_options, start, skip_rows, skip_cols, name, total_lines, func=0): """ Function for applying input settings to Open :param entry: File Name and Directory :param orig_headers: original headers :param inp_options: Input Settings ;) :param start: Start Time :param func: 0=CSVw/Delimiter,1=CSV,2=Excel :return: Dataframe, Dictionary of Column/FillVal's """ global var temp_df = [] new_field = GenFuncs.strip_dir(entry1) if not _test: progress = Progressbar(root, orient=HORIZONTAL, length=100, mode='determinate') progress.pack(fill=X) v = StringVar() Label(root, text=new_field).pack() Label(root, textvariable=v).pack() gen_rules = inp_options[0] delimiter = gen_rules['Delimiter'] terminator = gen_rules['Terminator'] header_line = gen_rules['Header Line'] index_col = gen_rules['Index Column'] chunk = gen_rules['Chunk'] verbose = gen_rules['Verbose'] header_func = gen_rules['Header Func'] var = verbose line_count = 0 only_cols = inp_options[1] dtypes = inp_options[2] head_func_dtypes = inp_options[3] if len(inp_options) > 5: search_col = inp_options[4] real_l = inp_options[5] filter_results = True else: filter_results = False new_field = GenFuncs.strip_dir(entry) stripped_headers = [] for item in orig_headers: try: stripped_headers.append(item.strip()) except AttributeError: stripped_headers.append(item) new_dtypes = {} if dtypes is not None: for key, value in dtypes.items(): if key in orig_headers: new_dtypes[key] = value elif key.strip() in orig_headers: new_dtypes[key.strip()] = value elif key in stripped_headers: ind = stripped_headers.index(key) new_dtypes[orig_headers[ind]] = value elif key.strip() in stripped_headers: ind = stripped_headers.index(key.strip()) new_dtypes[orig_headers[ind]] = value else: print(key + ':not found in ' + new_field) if new_dtypes == {}: new_dtypes = None new_only_cols = [] if only_cols is not None: for item in only_cols: if item in orig_headers: new_only_cols.append(item) elif item.strip() in orig_headers: new_only_cols.append(item.strip()) elif item in stripped_headers: ind = stripped_headers.index(item) new_only_cols.append(orig_headers[ind]) elif item.strip() in stripped_headers: ind = stripped_headers.index(item.strip()) new_only_cols.append(orig_headers[ind]) else: print(item + ':not found in ' + new_field) if only_cols is not None and name is None: try: header_line = skip_rows if func == 0: for gm_chunk in pd.read_csv(entry, sep=delimiter, chunksize=chunk, header=header_line, index_col=index_col, usecols=new_only_cols, dtype=new_dtypes, verbose=verbose, lineterminator=terminator, low_memory=False): line_count += gm_chunk.shape[0] temp_df.append(gm_chunk) if not _test: progress['value'] = (line_count / total_lines) * 100 v.set( str(line_count) + " : " + str(total_lines)) root.update_idletasks() data = pd.concat(temp_df, axis=0, sort=False, ignore_index=True) elif func == 1: for gm_chunk in pd.read_csv(entry, header=header_line, chunksize=chunk, index_col=index_col, usecols=new_only_cols, dtype=new_dtypes, verbose=verbose, lineterminator=terminator, low_memory=False): line_count += gm_chunk.shape[0] temp_df.append(gm_chunk) if not _test: progress['value'] = (line_count / total_lines) * 100 v.set( str(line_count) + " : " + str(total_lines)) root.update_idletasks() data = pd.concat(temp_df, axis=0, sort=False, ignore_index=True) elif func == 2: settings = [ entry, header_line, name, index_col, new_only_cols, new_dtypes, skip_rows, verbose, line_count ] data = readExcel(entry, chunk, progress, v, 0, settings) try: data.columns = [col.strip() for col in data.columns] except AttributeError: # 'int'object has no attribute 'strip' < - files with int headers pass if filter_results: data = Retrieve_R.esult_frames(data, search_col, real_l, entry) if not data.empty: data, NA_list = reduce_mem_usage(data) # [0] else: print('no results in 1') NA_list = [] return data, NA_list except ValueError as e: print(e) else: try: if name != None: header_line = 0 if func == 0: for gm_chunk in pd.read_csv(entry, sep=delimiter, header=header_line, names=name, chunksize=chunk, index_col=index_col, dtype=new_dtypes, skiprows=skip_rows, verbose=verbose, lineterminator=terminator, low_memory=False): line_count += gm_chunk.shape[0] temp_df.append(gm_chunk) if not _test: progress['value'] = (line_count / total_lines) * 100 v.set( str(line_count) + " : " + str(total_lines)) root.update_idletasks() data = pd.concat(temp_df, axis=0, sort=False, ignore_index=True) elif func == 1: for gm_chunk in pd.read_csv(entry, header=header_line, names=name, chunksize=chunk, index_col=index_col, dtype=new_dtypes, skiprows=skip_rows, verbose=verbose, lineterminator=terminator, low_memory=False): line_count += gm_chunk.shape[0] temp_df.append(gm_chunk) if not _test: progress['value'] = (line_count / total_lines) * 100 v.set( str(line_count) + " : " + str(total_lines)) root.update_idletasks() data = pd.concat(temp_df, axis=0, sort=False, ignore_index=True) elif func == 2: settings = [ entry, header_line, name, index_col, new_only_cols, new_dtypes, skip_rows, verbose, line_count ] data = readExcel(entry, chunk, progress, v, 1, settings) if skip_cols > 0: for i in range(skip_cols): data.drop(data.columns[1], axis=1) if name != None and len(new_only_cols) > 0: data = data[new_only_cols] try: data.columns = [col.strip() for col in data.columns] except AttributeError: # 'int'object has no attribute 'strip' < - files with int headers pass if filter_results: data = Retrieve_R.esult_frames(data, search_col, real_l, entry) if not data.empty: data, NA_list = reduce_mem_usage(data) # [0] else: print('no results due to header func criteria') NA_list = [] end = time.time() print('-------' + str(end - start) + '-------') return data, NA_list except ValueError as e: print(e)
def open_file(entry1, inp_options1, root, _test=False): """ :param entry1: Directory/FileName :param inp_options1: Input options :return: DataFrame, Directory/FileName, Dictionary of Column/FillVal's """ new_field = GenFuncs.strip_dir(entry1) print('Opening ' + new_field) start1 = time.time() def get_num_lines(file_path): fp = open(file_path, "r+") buf = mmap.mmap(fp.fileno(), 0) lines = 0 while buf.readline(): lines += 1 fp.close() return lines def readExcel(file_name, nrows, progress, v, func=0, sets=[], line_count=0): xl = pd.ExcelFile(file_name) sheetname = xl.sheet_names[0] df_header = pd.read_excel(file_name, sheet_name=sheetname, nrows=1) chunks = [] skiprows = 0 while True: if func == 0: df_chunk = pd.read_excel(file_name, sheet_name=sheetname, header=sets[1], index_col=sets[3], usecols=sets[4], dtype=sets[5], skiprows=skiprows, nrows=nrows, verbose=sets[7]) else: df_chunk = pd.read_excel(file_name, sheet_name=sheetname, header=sets[1], names=sets[2], index_col=sets[3], dtype=sets[5], skiprows=skiprows, nrows=nrows, verbose=sets[7]) skiprows += nrows if not df_chunk.shape[0]: break else: chunks.append(df_chunk) line_count += df_chunk.shape[0] if not _test: progress['value'] = (line_count / total_lines) * 100 v.set(str(line_count) + " : " + str(total_lines)) root.update_idletasks() df_chunks = pd.concat(chunks, sort=False) columns = { i: col for i, col in enumerate(df_header.columns.tolist()) } df_chunks.rename(columns=columns, inplace=True) return df_chunks def reduce_mem_usage(props): """ 99% Arjan's original code - if you're reading this Arjan - It was super easy to implement, Thank you! https://www.kaggle.com/arjanso/reducing-dataframe-memory-size-by-65 :param props: DataFrame :return: Slimmed DataFrame, Fill_Val Dictionary """ start_mem_usg = props.memory_usage().sum() / 1024**2 verb_print(("Memory usage of properties dataframe is :", start_mem_usg, " MB")) NAlist = { } # Keeps track of columns that have missing values filled in. for col in props.columns: if props[col].dtype != object and props[ col].dtype != 'datetime64[ns]': # Exclude strings and dates # Print current column type verb_print("******************************") verb_print(("Column: ", col)) verb_print(("dtype before: ", props[col].dtype)) # make variables for Int, max and min IsInt = False mx = props[col].max() # Add multiprocessing? mn = props[col].min() # Add multiprocessing? # Integer does not support NA, therefore, NA needs to be filled if not np.isfinite(props[col]).all(): fill_val = 0 for i in range(1, 10000, 3): if i not in np.unique(props[col].values): fill_val = i NAlist[col] = fill_val break props[col].fillna(fill_val, inplace=True) # test if column can be converted to an integer asint = props[col].fillna(0).astype(np.int64) result = (props[col] - asint) result = result.sum() if result > -0.01 and result < 0.01: IsInt = True # Make Integer/unsigned Integer datatypes if IsInt: if mn >= 0: if mx < 255: props[col] = props[col].astype(np.uint8) elif mx < 65535: props[col] = props[col].astype(np.uint16) elif mx < 4294967295: props[col] = props[col].astype(np.uint32) else: props[col] = props[col].astype(np.uint64) else: if mn > np.iinfo(np.int8).min and mx < np.iinfo( np.int8).max: props[col] = props[col].astype(np.int8) elif mn > np.iinfo(np.int16).min and mx < np.iinfo( np.int16).max: props[col] = props[col].astype(np.int16) elif mn > np.iinfo(np.int32).min and mx < np.iinfo( np.int32).max: props[col] = props[col].astype(np.int32) elif mn > np.iinfo(np.int64).min and mx < np.iinfo( np.int64).max: props[col] = props[col].astype(np.int64) # Make float datatypes 32 bit else: props[col] = props[col].astype(np.float32) # Print new column type verb_print(("dtype after: ", props[col].dtype)) verb_print("******************************") # Categorize Object/string Columns if unique values is less than 50% else: # Print current column type verb_print("******************************") verb_print(("Column: ", col)) verb_print(("dtype before: ", props[col].dtype)) num_unique = len(props[col].unique()) num_total = len(props[col]) if num_unique / num_total < 0.5: props[col] = props[col].astype('category') # Print new column type verb_print(("dtype after: ", props[col].dtype)) verb_print("******************************") # Print final result verb_print("___MEMORY USAGE AFTER SHRINK:___") mem_usg = props.memory_usage().sum() / 1024**2 verb_print(("Memory usage is: ", mem_usg, " MB")) verb_print(("This is ", 100 * mem_usg / start_mem_usg, "% of the initial size")) return props, NAlist def verb_print(text): """ Print only if Verbose Input setting is set to true. :param text: Text to print """ global var if var: print(text) def col_check(frame_slice, func_dict): """ Function for verifying/searching for header line in opening file - from header Func :param frame_slice: First 50 rows of the opening file :param func_dict: :return: First Header Row Location, First Header Column Location, List of Headers to apply if First header isn't in file """ slice_dict = {} slice_key_list = [] dict_key_list = [] for col in frame_slice.columns.values: try: slice_dict[col.strip()] = frame_slice[col].dtype except AttributeError: slice_dict[col] = frame_slice[col].dtype slice_key_list.append(col) for key in func_dict: dict_key_list.append(key) first_header = dict_key_list[0] if first_header == slice_key_list[0]: # first col header matches return 0, 0, None else: # first col header doesn't match print('first col header doesn\'t match') current_col = (-1) found_col = [] list1 = frame_slice[frame_slice.isin( [first_header])].dropna(how='all').count() for i in list1: # finding what column has the first func_dict header current_col += 1 if i > 0: found_col.append(current_col) print('col match ' + str(current_col)) if len( found_col ) > 0: # if one of the columns has the first func_dict header found_col_series = frame_slice.iloc[:, found_col[ 0]] # list of values from row that has first f_d header series_count = 0 found_row = [] for i in found_col_series: # finding what row has the func_dict header series_count += 1 if i == first_header: found_row.append(series_count) print('row match ' + str(series_count)) return found_row[0], found_col[0], None else: name = [] if len(slice_dict) == len(func_dict): for key, val in func_dict.items(): name.append(key) return None, 0, name def header_function(header_func, df, inp_opts): skip_rows = None skip_cols = 0 name = None if inp_opts[0]['Header Func']: skip_rows, skip_cols, name = col_check(df, inp_opts[3]) if skip_cols > 0: for i in range(skip_cols): df = df.drop(df.columns[1], axis=1) if skip_rows is not None: if skip_rows > 0: tem_list = range(skip_rows - 1) df = df.drop(df.index[tem_list]) df.columns = df.loc[(skip_rows - 1), :] orig_headers = df.columns.values.tolist() return skip_rows, skip_cols, name, orig_headers def open_func(entry, orig_headers, inp_options, start, skip_rows, skip_cols, name, total_lines, func=0): """ Function for applying input settings to Open :param entry: File Name and Directory :param orig_headers: original headers :param inp_options: Input Settings ;) :param start: Start Time :param func: 0=CSVw/Delimiter,1=CSV,2=Excel :return: Dataframe, Dictionary of Column/FillVal's """ global var temp_df = [] new_field = GenFuncs.strip_dir(entry1) if not _test: progress = Progressbar(root, orient=HORIZONTAL, length=100, mode='determinate') progress.pack(fill=X) v = StringVar() Label(root, text=new_field).pack() Label(root, textvariable=v).pack() gen_rules = inp_options[0] delimiter = gen_rules['Delimiter'] terminator = gen_rules['Terminator'] header_line = gen_rules['Header Line'] index_col = gen_rules['Index Column'] chunk = gen_rules['Chunk'] verbose = gen_rules['Verbose'] header_func = gen_rules['Header Func'] var = verbose line_count = 0 only_cols = inp_options[1] dtypes = inp_options[2] head_func_dtypes = inp_options[3] if len(inp_options) > 5: search_col = inp_options[4] real_l = inp_options[5] filter_results = True else: filter_results = False new_field = GenFuncs.strip_dir(entry) stripped_headers = [] for item in orig_headers: try: stripped_headers.append(item.strip()) except AttributeError: stripped_headers.append(item) new_dtypes = {} if dtypes is not None: for key, value in dtypes.items(): if key in orig_headers: new_dtypes[key] = value elif key.strip() in orig_headers: new_dtypes[key.strip()] = value elif key in stripped_headers: ind = stripped_headers.index(key) new_dtypes[orig_headers[ind]] = value elif key.strip() in stripped_headers: ind = stripped_headers.index(key.strip()) new_dtypes[orig_headers[ind]] = value else: print(key + ':not found in ' + new_field) if new_dtypes == {}: new_dtypes = None new_only_cols = [] if only_cols is not None: for item in only_cols: if item in orig_headers: new_only_cols.append(item) elif item.strip() in orig_headers: new_only_cols.append(item.strip()) elif item in stripped_headers: ind = stripped_headers.index(item) new_only_cols.append(orig_headers[ind]) elif item.strip() in stripped_headers: ind = stripped_headers.index(item.strip()) new_only_cols.append(orig_headers[ind]) else: print(item + ':not found in ' + new_field) if only_cols is not None and name is None: try: header_line = skip_rows if func == 0: for gm_chunk in pd.read_csv(entry, sep=delimiter, chunksize=chunk, header=header_line, index_col=index_col, usecols=new_only_cols, dtype=new_dtypes, verbose=verbose, lineterminator=terminator, low_memory=False): line_count += gm_chunk.shape[0] temp_df.append(gm_chunk) if not _test: progress['value'] = (line_count / total_lines) * 100 v.set( str(line_count) + " : " + str(total_lines)) root.update_idletasks() data = pd.concat(temp_df, axis=0, sort=False, ignore_index=True) elif func == 1: for gm_chunk in pd.read_csv(entry, header=header_line, chunksize=chunk, index_col=index_col, usecols=new_only_cols, dtype=new_dtypes, verbose=verbose, lineterminator=terminator, low_memory=False): line_count += gm_chunk.shape[0] temp_df.append(gm_chunk) if not _test: progress['value'] = (line_count / total_lines) * 100 v.set( str(line_count) + " : " + str(total_lines)) root.update_idletasks() data = pd.concat(temp_df, axis=0, sort=False, ignore_index=True) elif func == 2: settings = [ entry, header_line, name, index_col, new_only_cols, new_dtypes, skip_rows, verbose, line_count ] data = readExcel(entry, chunk, progress, v, 0, settings) try: data.columns = [col.strip() for col in data.columns] except AttributeError: # 'int'object has no attribute 'strip' < - files with int headers pass if filter_results: data = Retrieve_R.esult_frames(data, search_col, real_l, entry) if not data.empty: data, NA_list = reduce_mem_usage(data) # [0] else: print('no results in 1') NA_list = [] return data, NA_list except ValueError as e: print(e) else: try: if name != None: header_line = 0 if func == 0: for gm_chunk in pd.read_csv(entry, sep=delimiter, header=header_line, names=name, chunksize=chunk, index_col=index_col, dtype=new_dtypes, skiprows=skip_rows, verbose=verbose, lineterminator=terminator, low_memory=False): line_count += gm_chunk.shape[0] temp_df.append(gm_chunk) if not _test: progress['value'] = (line_count / total_lines) * 100 v.set( str(line_count) + " : " + str(total_lines)) root.update_idletasks() data = pd.concat(temp_df, axis=0, sort=False, ignore_index=True) elif func == 1: for gm_chunk in pd.read_csv(entry, header=header_line, names=name, chunksize=chunk, index_col=index_col, dtype=new_dtypes, skiprows=skip_rows, verbose=verbose, lineterminator=terminator, low_memory=False): line_count += gm_chunk.shape[0] temp_df.append(gm_chunk) if not _test: progress['value'] = (line_count / total_lines) * 100 v.set( str(line_count) + " : " + str(total_lines)) root.update_idletasks() data = pd.concat(temp_df, axis=0, sort=False, ignore_index=True) elif func == 2: settings = [ entry, header_line, name, index_col, new_only_cols, new_dtypes, skip_rows, verbose, line_count ] data = readExcel(entry, chunk, progress, v, 1, settings) if skip_cols > 0: for i in range(skip_cols): data.drop(data.columns[1], axis=1) if name != None and len(new_only_cols) > 0: data = data[new_only_cols] try: data.columns = [col.strip() for col in data.columns] except AttributeError: # 'int'object has no attribute 'strip' < - files with int headers pass if filter_results: data = Retrieve_R.esult_frames(data, search_col, real_l, entry) if not data.empty: data, NA_list = reduce_mem_usage(data) # [0] else: print('no results due to header func criteria') NA_list = [] end = time.time() print('-------' + str(end - start) + '-------') return data, NA_list except ValueError as e: print(e) if entry1[-4:] == '.csv': if inp_options1[0]['Delimiter'] != ',': try: orig_headers, name, skip_rows, skip_cols, total_lines = GenFuncs.file_opened( entry1) except TypeError: orig_headers = False if not orig_headers: total_lines = get_num_lines(entry1) df1 = pd.read_csv(entry1, sep=inp_options1[0]['Delimiter'], nrows=50, low_memory=False) if len(df1.columns) == 1: print( 'Delimiter Error: Only one row returned.\n File skipped. Please consider changing delimiter in the input settings.' ) df_empty = pd.DataFrame({'A': []}) end = time.time() print('-------' + str(end - start1) + '-------') return df_empty, 'non_val' else: skip_rows, skip_cols, name, orig_headers = header_function( entry1, df1, inp_options1) GenFuncs.update_opened_list(entry1, orig_headers, name, skip_rows, skip_cols, total_lines) data, NA_list = open_func(entry1, orig_headers, inp_options1, start1, skip_rows, skip_cols, name, total_lines) return data, entry1, NA_list else: data, NA_list = open_func(entry1, orig_headers, inp_options1, start1, skip_rows, skip_cols, name, total_lines) return data, entry1, NA_list else: try: orig_headers, name, skip_rows, skip_cols, total_lines = GenFuncs.file_opened( entry1) except TypeError: orig_headers = False if not orig_headers: total_lines = get_num_lines(entry1) df1 = pd.read_csv(entry1, nrows=50, low_memory=False) if len(df1.columns) == 1: print( 'Delimiter Error: Only one row returned.\n File skipped. Please consider changing delimiter in the input settings.' ) df_empty = pd.DataFrame({'A': []}) end = time.time() print('-------' + str(end - start1) + '-------') return df_empty, 'non_val' else: skip_rows, skip_cols, name, orig_headers = header_function( entry1, df1, inp_options1) GenFuncs.update_opened_list(entry1, orig_headers, name, skip_rows, skip_cols, total_lines) data, NA_list = open_func(entry1, orig_headers, inp_options1, start1, skip_rows, skip_cols, name, total_lines, func=1) return data, entry1, NA_list else: data, NA_list = open_func(entry1, orig_headers, inp_options1, start1, skip_rows, skip_cols, name, total_lines, func=1) return data, entry1, NA_list elif (entry1[-4:] == 'xlsx') or (entry1[-4:] == '.xls') or ( (entry1[-4:])[:3] == 'xls') or ((entry1[-4:])[:2] == 'xl'): file_stats = os.stat(entry1) if ( file_stats.st_size / (1024 * 1024) ) > 2: # check if xls file is larger than 2mb - don't open if so until large file support is added print( f"{GenFuncs.strip_dir(entry1)}Access is denied", 'xls Error: Size not supported\nConsider saving the file as .csv\nLarge csv files are supported' ) ############################# df_empty = pd.DataFrame({'A': []}) end = time.time() print('-------' + str(end - start1) + '-------') return df_empty, 'non_val' else: try: orig_headers, name, skip_rows, skip_cols, total_lines = GenFuncs.file_opened( entry1) except TypeError: orig_headers = False if not orig_headers: total_lines = get_num_lines(entry1) df1 = pd.read_excel(entry1, sheet_name=0, nrows=50) skip_rows, skip_cols, name, orig_headers = header_function( entry1, df1, inp_options1) GenFuncs.update_opened_list(entry1, orig_headers, name, skip_rows, skip_cols, total_lines) data, NA_list = open_func(entry1, orig_headers, inp_options1, start1, skip_rows, skip_cols, name, total_lines, func=2) return data, entry1, NA_list else: data, NA_list = open_func(entry1, orig_headers, inp_options1, start1, skip_rows, skip_cols, name, total_lines, func=2) return data, entry1, NA_list elif entry1[-3:] == '.h5': data = pd.read_hdf(entry1, 'df') filter_results = False if len(inp_options1) > 5: filter_results = True if filter_results: data = Retrieve_R.esult_frames(data, search_col, real_l, entry1) end = time.time() print('-------' + str(end - start1) + '-------') return data, entry1, {} else: df_empty = pd.DataFrame({'A': []}) end = time.time() print('-------' + str(end - start1) + '-------') return df_empty, 'non_val'
def make(self, root=None, fields=[], func=0, func2=0, NAdict={}): if func == 1: """ Main window - :param func2: 1= Input box1 and input box2, 0 = footer Generated by resort :param fields: Labels for Entry boxes """ self.entries = [] for field in fields: row = Frame(root) if func2 == 1: ent = Entry(row, width=2) file_name = GenFuncs.strip_dir(field) else: ent = Entry(row) file_name = field lab = Label(row, text=file_name, anchor='w') row.pack(side=TOP, fill=X, padx=5, pady=2) lab.pack(side=LEFT) if func2 == 1: ent.pack(side=RIGHT) else: ent.pack(side=RIGHT, expand=YES, fill=X) self.entries.append((field, ent)) return self.entries elif func == 2: """ Options Initial window """ global opt_window, footer_1, opt_footer IN_or_OUT = 'File Input', 'Search Output' try: win_exists_var = Toplevel.winfo_exists(opt_window) except NameError: win_exists_var = 0 if win_exists_var != 1: opt_window = Toplevel() opt_window.title("File_Pal_1.1") header = Frame(opt_window) body1 = Frame(opt_window) footer_1 = Frame(opt_window) variable = StringVar(header) variable.set('Click Here') w = OptionMenu(header, variable, *IN_or_OUT) header.pack() variable.trace("w", partial(self.changed_1, widget=variable)) w.pack(padx=20) Label(body1, text=' --- Options --- ').pack() body1.pack() opt_window.mainloop() elif func == 3: """ General Input Options Frame """ self.entries = [] gen_opts = 'Delimiter', 'Terminator', 'Header Line', 'Index Column', 'Chunk', 'CPU Cores', 'Verbose',\ 'Header Func', 'Main Win Criteria' gen_def = { 'Delimiter': ',', 'Terminator': 'DV', 'Header Line': 'DV', 'Index Column': 'DV', 'Chunk': 'DV', 'CPU Cores': 1, 'Verbose': 0 } temp_dict = GenFuncs.gen_set() for opt in gen_opts: if opt != 'Verbose' and opt != 'Header Func' and opt != 'Main Win Criteria': row = Frame(opt_footer) lab = Label(row, width=12, text=opt, anchor='w') ent = Entry(row, width=3) row.pack(side=TOP, fill=X, padx=5, pady=2) lab.pack(side=LEFT) ent.pack(side=RIGHT, expand=YES, fill=X) if opt in temp_dict: ent.insert(0, temp_dict[opt]) elif opt in gen_def: ent.insert(0, gen_def[opt]) self.entries.append((opt, ent)) else: row = Frame(opt_footer) var1 = IntVar() if opt == 'Header Func': var1.trace( "w", partial(self.changed_4, root=opt_footer, var=var1)) ent = Checkbutton(row, text=opt, variable=var1) if opt in temp_dict: var1.set(temp_dict[opt]) elif opt in gen_def: var1.set(gen_def[opt]) row.pack(side=TOP, fill=X, padx=5, pady=2) ent.pack(side=RIGHT) self.entries.append((opt, (var1))) last_row = Frame(opt_footer) sec_last_row = Frame(opt_footer) bload = Button( last_row, text='Save Changes', command=(lambda e='dont get lambda': self.opt_rule())) bload.pack(side=RIGHT) breset = Button( sec_last_row, text='Reset', command=(lambda e='dont get lambda': self.opt_rule(2))) breset.pack(side=RIGHT) sec_last_row.pack() last_row.pack() return self.entries elif func == 4: """ Search Output Options Initial Frame :param NAdict: Options list """ IN_OPTIONS = NAdict['in_list'] header = Frame(footer_1) body = Frame(footer_1) opt_footer = Frame(footer_1) variable = StringVar(header) variable.set('Click Here') w = OptionMenu(header, variable, *IN_OPTIONS) header.pack() variable.trace("w", partial(NAdict['change_func'], widget=variable)) w.pack() body.pack() elif func == 5: """ General Output Options Frame """ var_file = shelve.open( os.path.join(os.path.expanduser('~'), 'var_file')) try: dir_loc = var_file['dir_location'] except KeyError: dir_loc = tempfile.gettempdir() try: glob_dec_place = var_file['glob_dec_place'] except KeyError: glob_dec_place = False try: font_type_size = var_file['font_rules'] except KeyError: font_type_size = {} var_file.close() row1 = Frame(opt_footer) lab = Label(row1, width=8, text='Font Style', anchor='w') ent = Entry(row1, width=8) lab.pack(side=LEFT) ent.pack(side=LEFT) lab1 = Label(row1, width=7, text='Font Size', anchor='w') ent1 = Entry(row1, width=2) lab1.pack(side=LEFT) ent1.pack(side=LEFT) lab2 = Label(row1, width=11, text='Decimal Places', anchor='w') ent2 = Entry(row1, width=2) lab2.pack(side=LEFT) ent2.pack(side=LEFT) row1.pack(side=TOP, padx=5, pady=2) if font_type_size != {}: ent.insert(0, (str(list(font_type_size.keys())[0]))) ent1.insert(0, (str(list(font_type_size.values())[0]))) if glob_dec_place != False and glob_dec_place.strip() != 'False': ent2.insert(0, (str(glob_dec_place))) row2 = Frame(opt_footer) bsave = Button(row2, text='Save Changes', command=(lambda e='dont get lambda': self.save_font( ent, ent1, ent2))) bsave.pack(side=LEFT) bexport = Button(row2, text='Export Settings', command=(lambda e='dont get lambda': self. exp_imp_sets(row2, dir_loc, 2))) bexport.pack(side=LEFT) bimport = Button(row2, text='Import Settings', command=(lambda e='dont get lambda': self. exp_imp_sets(row2, dir_loc))) bimport.pack(side=LEFT) row2.pack() last_row = Frame(opt_footer) bchange_dir = Button(last_row, text='Output Dir', command=(lambda e='dont get lambda': self. update_dir(last_row, opt_footer))) bchange_dir.pack(side=LEFT) Label(last_row, text=dir_loc, anchor='w').pack() last_row.pack(fill=X) elif func == 6: """ General options List Frame :param NAdict['label']: Label above list and below input field :param NAdict['but_name']: Button Label :param NAdict['list_opts']: Drop down list options :param NAdict['dict/list']: shelve dict/list name """ row = Frame(opt_footer) row2 = Frame(opt_footer) self.footer = Frame(opt_footer) lab = Label(row2, text=NAdict['label']) ent = Entry(row, width=13) variable = StringVar(row) bsave = Button( row, text=NAdict['but_name'], command=(lambda e='nothin': self.but_func( ent, opt_footer, NAdict['dict/list'], variable))) row.pack(side=TOP, fill=X, padx=5, pady=2) row2.pack(side=TOP, fill=X, padx=5, pady=2) lab.pack(side=TOP) bsave.pack(side=LEFT) ent.pack(side=LEFT) if NAdict['list_opts'] != []: variable.set(NAdict['list_opts'][0]) w = OptionMenu(row, variable, *NAdict['list_opts']) w.pack(side=LEFT) var_file = shelve.open( os.path.join(os.path.expanduser('~'), 'var_file')) try: rules = var_file[NAdict['dict/list']] self.print_lab(rules, NAdict['dict/list']) breset = Button(self.footer, text='Reset List', command=(lambda e='what this': self. reset_col_list(NAdict['dict/list']))) breset.pack() except KeyError: pass var_file.close() self.footer.pack() return 'usecols', ent elif func == 7: """ Generates footer Frame of main window with opened files listed. :param rootx: Parent Frame. :param fields: List of open files. :return: List of Files with checkbutton Status. """ entries = [] for field in fields: temp_field = field.split('/') new_field = 'Search: ' + temp_field[(len(temp_field) - 1)] vrow = Frame(root) var1 = IntVar() var1.set(1) ent = Checkbutton(vrow, text=new_field, variable=var1) bx = Button(vrow, text='Headers', command=(lambda e=field: header_button(key=e))) vrow.pack(side=TOP, fill=X, padx=5, pady=2) ent.pack(side=LEFT) bx.pack(side=RIGHT) entries.append((field, ent, var1)) return entries elif func == 8: var_file = shelve.open( os.path.join(os.path.expanduser('~'), 'var_file')) try: profs = var_file['Profilez'] except KeyError: profs = {} var_file.close() def save_prof(name): global opt_footer, inp_ents var_file = shelve.open( os.path.join(os.path.expanduser('~'), 'var_file')) try: space_dict = var_file['col_spacing'] except: space_dict = {} try: zero_dict = var_file['lead_zeroes'] except: zero_dict = {} try: font_dict = var_file['font_rules'] except: font_dict = {} try: dec_dict = var_file['decimal_places'] except: dec_dict = {} try: if glob_dec.strip() != 'False': glob_dec = var_file['glob_dec_place'] else: glob_dec = False except: glob_dec = False profs[name.get()] = [ space_dict, zero_dict, font_dict, dec_dict, glob_dec, name.get() ] var_file['Profilez'] = profs var_file.close() opt_footer.pack_forget() opt_footer.destroy() opt_footer = Frame(opt_window) opt_footer.pack() inp_ents = self.make(opt_footer, func=8) def imp_prof(name): var_file = shelve.open( os.path.join(os.path.expanduser('~'), 'var_file')) loadin_sets = profs[name] var_file['col_spacing'] = loadin_sets[0] var_file['lead_zeroes'] = loadin_sets[1] var_file['font_rules'] = loadin_sets[2] var_file['decimal_places'] = loadin_sets[3] var_file['glob_dec_place'] = loadin_sets[4] var_file.close() def del_prof(name): global opt_footer, inp_ents var_file = shelve.open( os.path.join(os.path.expanduser('~'), 'var_file')) temp_profs = var_file['Profilez'] del temp_profs[name] var_file['Profilez'] = temp_profs var_file.close() opt_footer.pack_forget() opt_footer.destroy() opt_footer = Frame(opt_window) opt_footer.pack() inp_ents = self.make(opt_footer, func=8) for p, v in profs.items(): rowx = Frame(opt_footer) rowx.pack() import_settings = Button( rowx, text=v[5], command=(lambda e='dont get lambda': imp_prof(v[5]))) import_settings.pack(side=LEFT) del_but = Button(rowx, text="Delete", command=(lambda e=v[5]: del_prof(e))) del_but.pack(side=LEFT) brow = Frame(opt_footer) brow.pack() lbut = Button( brow, text="Save Profile", command=(lambda e='dont get lambda': save_prof(ent2))) ent2 = Entry(brow, width=15) lbut.pack(side=LEFT) ent2.pack(side=LEFT)