def make_excel_file(total_data_dic, filename, folder, change_sheet_func=None): """ Gets a dic of lists of lists of data to output into cells. Each lists will give a new row, and each element in the lists will create a new cell in the next column. The key of the dic will be the sheetname, while the prop will be first level of lists. :param data_list: dictionary of lists of lists of data :param filename: string of the excel filename :param folder: string of where the folder will be :return: None """ ffile.move_dir(folder) wb = openpyxl.Workbook() for sheetname, excel_data_lists in total_data_dic.items(): sheet = wb.create_sheet(title=sheetname) for y, row_data_lists in enumerate(excel_data_lists): rowNum = y + 1 for x, data in enumerate(row_data_lists): columnNum = x + 1 sheet.cell(row=rowNum, column=columnNum).value = data if change_sheet_func != None: change_sheet_func(sheet) wb.remove_sheet(wb.get_sheet_by_name('Sheet')) wb.save(filename) ffile.dir_back()
def output_conv_ups_data(ups_data): # outputs ups data into excel file ffile.move_dir("excel") wb = openpyxl.Workbook() sheet = wb.active i = 1 for date, data_dic in ups_data.items(): sheet['A' + str(i)] = date i += 1 for tracking_num, data_obj in data_dic.items(): sheet['A' + str(i)] = tracking_num i += 1 simple_data_list = data_obj.get_simple_datalist_str() for s in simple_data_list: sheet['A' + str(i)] = s i += 1 sheet['A' + str(i)] = "detail" i += 1 detail_data_list = data_obj.get_detail_datalist_str() for d in detail_data_list: sheet['A' + str(i)] = d i += 1 wb.save('ups_data.xlsx') ffile.dir_back()
def process_pdfs(): index_dic = get_pdf_index_dic() print("INDEX", index_dic) for pdf_name, rcv_name_list in index_dic.items(): ffile.move_dir('unread_pdf') msg = "Processing " + pdf_name print(msg) pdf_filename = pdf_name + '.pdf' print(pdf_filename) try: pdfFile = open(pdf_filename, 'rb') except FileNotFoundError: print("FILE NOT FOUND " + pdf_filename) print(os.getcwd()) continue pdfReader = PyPDF2.PdfFileReader(pdfFile) ffile.dir_back() ffile.move_dir('proc_pdf/') pdf_pages = pdfReader.numPages num_pdf_names = len(rcv_name_list) if pdf_pages != num_pdf_names: raise Exception('num_pages != pdf_pages of file ' + pdf_filename) prev_rcv_filename = "" prev_pdfwriter = PyPDF2.PdfFileWriter() for i, rcv_name in enumerate(rcv_name_list): page = i + 1 if rcv_name != "-": if page != 1: # print(page, "TEST",prev_pdfwriter, prev_rcv_filename) #Write previous RCV PDF make_pdf_file(prev_pdfwriter, prev_rcv_filename) prev_rcv_filename = 'RCV' + rcv_name prev_pdfwriter = PyPDF2.PdfFileWriter() pageObj = pdfReader.getPage(page - 1) prev_pdfwriter.addPage(pageObj) make_pdf_file(prev_pdfwriter, prev_rcv_filename) ffile.dir_back()
def convert_file(filename, folder_name, new_filename ='fedex_rates.xlsx', annual_charge = 0.00): """ Opens an excel file containing standard base Fedex rates and will convert it to the appropriate values based on the discount and the absolute minimum. Will change the excel file itself. The excel filel will have first row containing zone information, and the first column containing weights. """ weight_column = 'A' ffile.move_dir(folder_name) wb = openpyxl.load_workbook(filename) sheet_names_list = wb.get_sheet_names() for sheet_name in sheet_names_list: sheet = wb.get_sheet_by_name(sheet_name) calc_func = match_sheet_names(sheet_name) max_rows = sheet.max_row max_columns = sheet.max_column col_letters_list = column_letters[:max_columns] # Indexes the zone that each column corresponds to (by index) column_index = {"weight": weight_column,} for letter in col_letters_list: zone_value = sheet[letter + '1'].value if letter == 'A': continue zone_dic = find_zones(zone_value) process_column_zone_index(column_index, zone_dic, letter) for row_num in range(2, max_rows + 1): row = str(row_num) weight = sheet[weight_column + row].value for zone, column_letter in column_index.items(): if zone != "weight": cell_loc = column_letter + row rate = sheet[cell_loc].value #blank rates are returned back if rate == '-': new_rate = rate else: new_rate = process_rate(calc_func, zone, weight, rate, annual_charge) # print(rate, new_rate) sheet[cell_loc] = new_rate else: # print("weight: " + str(sheet[column_letter + row].value)) continue wb.save(new_filename) ffile.dir_back()
def open_data_excel(): ffile.move_dir("excel_data") mypath = '.' cust_id_letter = 'G' cbm_col_letter = 'O' container_id_letter = 'P' files_list = [f for f in os.listdir(mypath) if isfile(join(mypath, f))] total_data_dic = {} for filename in files_list: filename_encoded = filename.encode('utf8') wb = openpyxl.load_workbook(filename) sheetname_list = wb.get_sheet_names() file_data_dic = total_data_dic[filename_encoded] = {} for sheetname in sheetname_list: sheet = wb.get_sheet_by_name(sheetname) sheetname_encoded = sheetname.encode('utf8') data_dic = {} file_data_dic[sheetname_encoded] = data_dic max_row = sheet.max_row prev_pallet_id = "" for row in range(2, max_row + 1): row_str = str(row) cust_id = sheet[cust_id_letter + row_str].value if cust_id == None: continue if cust_id not in data_dic: data_dic[cust_id] = [] try: cbm = float(sheet[cbm_col_letter + row_str].value) except (ValueError, TypeError): cbm = None pallet_id = sheet[pallet_id_letter + row_str].value if pallet_id == None: pallet_id = prev_pallet_id data_dic[cust_id].append({"cbm": cbm, "pallet_id": pallet_id}) prev_pallet_id = pallet_id ffile.dir_back() return total_data_dic
def process_excel_fedex(file_name="fedex_rates.xlsx"): ffile.move_dir("data") wb = openpyxl.load_workbook(file_name) sheetnames_list = wb.get_sheet_names() rate_dic = {} for delivery_name, para_list in fedex_rate_proc_index.items(): sheet = wb.get_sheet_by_name(delivery_name) rates = proc_sheet_for_rates(sheet, *para_list) # print(delivery_name) # print(rates) rate_dic[delivery_name] = rates # sheet = wb.get_sheet_by_name('Standard Overnight') # rates = proc_sheet_for_rates(sheet) # print(rates) ffile.dir_back() return rate_dic
def output_raw_data(raw_ups_data): # outputs raw ups data into excel file ffile.move_dir("excel") wb = openpyxl.Workbook() sheet = wb.active i = 1 for tracking_num, data_types in raw_ups_data.items(): sheet['A' + str(i)] = tracking_num i += 1 for data_type, data_list in data_types.items(): sheet['A' + str(i)] = data_type i += 1 for data in data_list: sheet['A' + str(i)] = str(data) # for j, key in enumerate(data): # sheet[alphabet_list[j * 2] + str(i)] = key # sheet[alphabet_list[j * 2 + 1] + str(i)] = str(data[key]) i += 1 wb.save('ups_raw_data.xlsx') ffile.dir_back()
def process_excel_fedex(earned_num=0): """ Input: earned_num(float), signifying the earned discount obtained. Output: rate_dic -> [ sheet/ delivery name ] [ weight(dic) ][zone] = rate(float) """ ffile.move_dir("fedex_rates") earned_string = '' if earned_num == 0: earned_string = 'no' elif earned_num == 1: earned_string = '1st' elif earned_num == 2: earned_string = '2nd' elif earned_num == 3: earned_string = '3rd' elif earned_num == 4: earned_string = '4th' elif earned_num == 5: earned_string = '5th' rate_dic = {} fedex_rate_excel_filename = "fedex_discounted_rates_" + earned_string + "_earned.xlsx" wb = openpyxl.load_workbook(fedex_rate_excel_filename) sheet_names_list = wb.get_sheet_names() for sheet_name in sheet_names_list: sheet = wb.get_sheet_by_name(sheet_name) rates = proc_sheet_for_rates(sheet) rate_dic[sheet_name] = rates ffile.dir_back() return rate_dic
def convert_file(filename, folder_name, new_filename, annual_charge): ffile.move_dir(folder_name) wb = openpyxl.load_workbook(filename) sheet_names = wb.get_sheet_names() for sheet_name in sheet_names: sheet = wb.get_sheet_by_name(sheet_name) zones_dic = excel_helper.get_zones(sheet) calc_func = get_calc_func(sheet_name) # print(zone_dic) num_rows = sheet.max_row weight_column = excel_helper.weight_column for row_num in range(int(excel_helper.zone_row) + 1, num_rows + 1): weight = sheet[weight_column + str(row_num)].value # print(weight) for column_letter, zone_dic in zones_dic.items(): if column_letter == weight_column: continue zone = zone_dic["start"] cell_loc = column_letter + str(row_num) # print(column_letter, zone_dic) full_rate = sheet[cell_loc].value # print(full_rate) new_rate = process_rate(calc_func, zone, weight, full_rate, annual_charge) sheet[cell_loc] = new_rate wb.save(new_filename) ffile.dir_back()
def read_simple_ups(simple_ups_filename, folder_name): fieldnames_index = {} ffile.move_dir(folder_name) total_simple_ups_data = {} def filter_data(simple_ups): # Input: detail_ups_dic # Return True if this detail_ups_dic should be added. # False otherwise. tracking_num = simple_ups["tracking_num"] if tracking_num == "": return False else: return True def get_fieldnames(row): fieldnames_dic = {} for infile_name, file_fieldname in simple_ups_fieldnames.items(): fieldnames_dic[infile_name] = row.index(file_fieldname) return fieldnames_dic def extract_data(row): ups_simple_dic = {} for fieldname, column in fieldnames_index.items(): if fieldname == "billed_charge": ups_simple_dic[ fieldname] = excel_helper.convert_charge_string_to_float( row[column]) else: ups_simple_dic[fieldname] = row[column] return ups_simple_dic with open(simple_ups_filename) as f_simple: reader = csv.reader(f_simple) # find row with fieldnames & set fieldnames_index for row in reader: try: if "Account Number" in row: fieldnames_index = get_fieldnames(row) break except IndexError: pass tracking_num_column = fieldnames_index["tracking_num"] for row in reader: tracking_num = row[tracking_num_column] # print(tracking_num, simple_ups_filename) simple_ups_data = extract_data(row) if not filter_data(simple_ups_data): continue if tracking_num not in total_simple_ups_data: total_simple_ups_data[tracking_num] = [ simple_ups_data, ] else: total_simple_ups_data[tracking_num].append(simple_ups_data) ffile.dir_back() return total_simple_ups_data
def read_detail_ups(detail_ups_filename, folder_name): total_detail_ups_data = {} fieldnames_index = {} prev_track_num = "" def extract_data_from_row(row): # Extracts data from the row of csv file using # fieldnames_index dic which gives which and what columns to extract ups_detail_dic = {} for fieldname, column_num in fieldnames_index.items(): if fieldname == "billed_charge": ups_detail_dic[ fieldname] = excel_helper.convert_charge_string_to_float( row[column_num]) else: ups_detail_dic[fieldname] = row[column_num] return ups_detail_dic def filter_data(detail_ups): # Input: detail_ups_dic # Return True if this detail_ups_dic should be added. # False otherwise. tracking_num = detail_ups["tracking_num"] billed_charge = detail_ups["billed_charge"] if tracking_num == "": return False elif billed_charge == 0: return False else: return True ffile.move_dir(folder_name) fieldnames_index = get_detail_fieldnames_index() with open(detail_ups_filename) as f_detail: reader = csv.reader(f_detail) for row in reader: detail_ups_dic = extract_data_from_row(row) tracking_num = detail_ups_dic["tracking_num"] # skip those without tracking number or with 0 billed charge if not filter_data(detail_ups_dic): continue if tracking_num not in total_detail_ups_data: total_detail_ups_data[tracking_num] = [[ detail_ups_dic, ]] else: d_list = total_detail_ups_data[tracking_num] if prev_track_num == tracking_num: # print(d_list[len(d_list) -1]) last_ups_detail_list = d_list[len(d_list) - 1] last_ups_detail_list.append(detail_ups_dic) else: d_list.append([ detail_ups_dic, ]) # print(tracking_num, prev_track_num, tracking_num == prev_track_num, tracking_num not in total_detail_ups_data) prev_track_num = tracking_num # print(detail_ups_dic) # print(fieldnames_index) ffile.dir_back() # print(total_detail_ups_data['1Z0019850396816706']) return total_detail_ups_data
def get_pdf_index_dic(): index_dic = {} ffile.move_dir("unread_pdf") try: wb = openpyxl.load_workbook('index.xlsx') except FileNotFoundError: print("index.xlsx was not found") return 0 sheet_list = wb.get_sheet_names() sheetname = sheet_list[0] sheet = wb.get_sheet_by_name(sheetname) max_columns = sheet.max_column cur_year = '' cur_month = '' repeat_page_symbol = '-' for col in range(1, max_columns + 1, 4): pdf_name = str(sheet.cell(row=1, column=col).value) index_dic[pdf_name] = [] num_pages = sheet.cell(row=2, column=col).value if num_pages == None: msg = "No num of pages " + pdf_name + " in column " + str(col) raise Exception(msg) pages_col_num = col + 1 year_col = col + 1 month_col = col + 2 day_rcv_col = col + 3 year = '' month = '' day_rcv = '' for page in range(1, num_pages + 1): year_cell = sheet.cell(row = page, column=year_col).value if year_cell != None: if year_cell < 10: year = "0" + str(year_cell) else: year = str(year_cell) month_cell = sheet.cell(row = page, column=month_col).value if month_cell != None: if month_cell < 10: month = "0" + str(month_cell) else: month = str(month_cell) day_rcv_cell = sheet.cell(row = page, column=day_rcv_col).value day_rcv = str(day_rcv_cell) if day_rcv == repeat_page_symbol: rcv_name = day_rcv else: rcv_name = year + month + day_rcv index_dic[pdf_name].append(rcv_name) ffile.dir_back() return index_dic
from PIL import Image import pytesseract import re import os, time, os.path def get_img_string(filename): if not filename.endswith('.jpg'): msg = filename + " is not a jpg file." raise Exception(msg) img_string = pytesseract.image_to_string(Image.open(filename)) return img_string ffile.move_dir("jpg_raw") file_list = os.listdir() for filename in file_list: img_string = get_img_string(filename) pattern = re.compile("RCV\d+-\d+") re_result = pattern.search(img_string) if re_result != None: new_filename = '' for i in range(0, 100): new_filename = re_result[0] + '_' + str(i) + '.jpg'
def output_fedex_ups_dat(fedex_list_inst, ups_invoice_date, earned): dates_list = fedex_list_inst.get_dates() ffile.move_dir("excel") wb = openpyxl.Workbook() sheet = wb.active i = 1 sheet.freeze_panes = 'A2' UPS_column_width = 30 tracking_num_width = 20 fedex_column_width = 30 sheet.column_dimensions['D'].width = UPS_column_width sheet.column_dimensions['E'].width = tracking_num_width sheet.column_dimensions['F'].width = fedex_column_width header_list = [ "Date", "Zone", "Weight", "UPS", "Track Num/Rate", "Fedex", "FX Rate" ] date_col_in = header_list.index("Date") date_col_letter = alphabet_list[date_col_in] zone_col_in = header_list.index("Zone") zone_col_letter = alphabet_list[zone_col_in] weight_col_in = header_list.index("Weight") weight_col_letter = alphabet_list[weight_col_in] ups_col_in = header_list.index("UPS") ups_col_letter = alphabet_list[ups_col_in] ups_rate_col_in = header_list.index("Track Num/Rate") ups_rate_col_letter = alphabet_list[ups_rate_col_in] fedex_col_in = header_list.index("Fedex") fedex_col_letter = alphabet_list[fedex_col_in] fedex_rate_col_in = header_list.index("FX Rate") fedex_rate_col_letter = alphabet_list[fedex_rate_col_in] for column_num, header in enumerate(header_list): column_letter = alphabet_list[column_num] sheet[column_letter + str(i)] = header i += 1 for date in dates_list: tracking_num_list = fedex_list_inst.get_ups_tracking_nums(date) for tracking_num in tracking_num_list: data_dic = fedex_list_inst.get_first_dataset(date, tracking_num) zone = data_dic["Zone"] date = data_dic["Date"] weight = data_dic["Weight"] sheet[date_col_letter + str(i)] = date sheet[zone_col_letter + str(i)] = zone sheet[weight_col_letter + str(i)] = weight sheet[ups_col_letter + str(i)] = "UPS" sheet[ups_rate_col_letter + str(i)] = tracking_num sheet[fedex_col_letter + str(i)] = "Fedex" i += 1 ups_data_dic = data_dic["ups"] fedex_data_dic = data_dic["fedex"] total_ups_rate = ups_data_dic["Total Charge"] total_fedex_rate = fedex_data_dic["Total Charge"] ups_data_list = ups_data_dic['Charges List'] fedex_data_list = fedex_data_dic['Charges List'] for list_index, ups_rate_dic in enumerate(ups_data_list): # print(ups_data_list, len(ups_data_list)) # print(fedex_data_list, len(fedex_data_list)) fedex_rate_dic = fedex_data_list[list_index] ups_rate = ups_rate_dic["Billed Charge"] ups_charge_type = ups_rate_dic["Charge Type"] fedex_rate = fedex_rate_dic["Billed Charge"] fedex_charge_type = fedex_rate_dic["Charge Type"] sheet[ups_col_letter + str(i)] = ups_charge_type sheet[ups_rate_col_letter + str(i)] = ups_rate sheet[fedex_col_letter + str(i)] = fedex_charge_type sheet[fedex_rate_col_letter + str(i)] = fedex_rate i += 1 sheet[ups_col_letter + str(i)] = "TOTAL" sheet[ups_rate_col_letter + str(i)] = total_ups_rate sheet[fedex_col_letter + str(i)] = "TOTAL" sheet[fedex_rate_col_letter + str(i)] = total_fedex_rate diff_word_in = fedex_rate_col_in + 1 diff_word_col_letter = alphabet_list[diff_word_in] diff_rate_in = fedex_rate_col_in + 2 diff_rate_col_letter = alphabet_list[diff_rate_in] sheet[diff_word_col_letter + str(i)] = "Difference" sheet[diff_rate_col_letter + str(i)] = total_ups_rate - total_fedex_rate i += 1 # print(ups_data_list) # for ups_data_index in ups_data_list: # ups_charges_list = ups_data_list[ups_data_index]["Charges List"] # fedex_charges_list =fedex_data_list[ups_data_index]["Chares List"] # #UPS & Fedex should have the same keys # print(ups_charges_list) # print(fedex_charges_list) save_filename = ups_invoice_date + ' ups_and_fedex_data ' + earned + '_earned.xlsx' wb.save(save_filename) ffile.dir_back()