Exemplo n.º 1
0
def make_excel_file(total_data_dic, filename, folder, change_sheet_func=None):
    """
    Gets a dic of lists of lists of data to output into cells.
    Each lists will give a new row, and each element in the lists
        will create a new cell in the next column.
    The key of the dic will be the sheetname, while the prop will
        be first level of lists.
    :param data_list: dictionary of lists of lists of data
    :param filename: string of the excel filename
    :param folder: string of where the folder will be
    :return: None
    """
    ffile.move_dir(folder)

    wb = openpyxl.Workbook()

    for sheetname, excel_data_lists in total_data_dic.items():
        sheet = wb.create_sheet(title=sheetname)
        for y, row_data_lists in enumerate(excel_data_lists):
            rowNum = y + 1
            for x, data in enumerate(row_data_lists):
                columnNum = x + 1
                sheet.cell(row=rowNum, column=columnNum).value = data
        if change_sheet_func != None:
            change_sheet_func(sheet)

    wb.remove_sheet(wb.get_sheet_by_name('Sheet'))

    wb.save(filename)

    ffile.dir_back()
Exemplo n.º 2
0
def output_conv_ups_data(ups_data):
    # outputs ups data into excel file
    ffile.move_dir("excel")

    wb = openpyxl.Workbook()
    sheet = wb.active
    i = 1

    for date, data_dic in ups_data.items():
        sheet['A' + str(i)] = date
        i += 1
        for tracking_num, data_obj in data_dic.items():
            sheet['A' + str(i)] = tracking_num
            i += 1

            simple_data_list = data_obj.get_simple_datalist_str()
            for s in simple_data_list:
                sheet['A' + str(i)] = s
                i += 1

            sheet['A' + str(i)] = "detail"
            i += 1
            detail_data_list = data_obj.get_detail_datalist_str()
            for d in detail_data_list:
                sheet['A' + str(i)] = d
                i += 1
    wb.save('ups_data.xlsx')

    ffile.dir_back()
Exemplo n.º 3
0
def process_pdfs():
    index_dic = get_pdf_index_dic()
    print("INDEX", index_dic)

    for pdf_name, rcv_name_list in index_dic.items():

        ffile.move_dir('unread_pdf')

        msg = "Processing " + pdf_name
        print(msg)

        
        pdf_filename = pdf_name + '.pdf'

        print(pdf_filename)

        try:
            pdfFile = open(pdf_filename, 'rb')
        except FileNotFoundError:
            print("FILE NOT FOUND " + pdf_filename)
            print(os.getcwd())
            continue

        pdfReader = PyPDF2.PdfFileReader(pdfFile)

        ffile.dir_back()


        ffile.move_dir('proc_pdf/')

        pdf_pages = pdfReader.numPages

        num_pdf_names = len(rcv_name_list)
        if pdf_pages != num_pdf_names:
            raise Exception('num_pages != pdf_pages of file ' + pdf_filename)

        prev_rcv_filename = ""
        prev_pdfwriter = PyPDF2.PdfFileWriter()


        for i, rcv_name in enumerate(rcv_name_list):
            page = i + 1
            if rcv_name != "-":
                if page != 1:
                    # print(page, "TEST",prev_pdfwriter, prev_rcv_filename)
                    #Write previous RCV PDF
                    make_pdf_file(prev_pdfwriter, prev_rcv_filename)

                prev_rcv_filename = 'RCV' + rcv_name
                prev_pdfwriter = PyPDF2.PdfFileWriter()


            pageObj = pdfReader.getPage(page - 1)
            prev_pdfwriter.addPage(pageObj)
        
        make_pdf_file(prev_pdfwriter, prev_rcv_filename)

        ffile.dir_back()
Exemplo n.º 4
0
def convert_file(filename, folder_name, new_filename ='fedex_rates.xlsx', annual_charge = 0.00):
	""" 
	Opens an excel file containing standard base Fedex rates and will
		convert it to the appropriate values based on the discount
		and the absolute minimum. Will change the excel file itself.
	The excel filel will have first row containing zone information,
		and the first column containing weights.
	"""

	weight_column = 'A'

	ffile.move_dir(folder_name)

	wb = openpyxl.load_workbook(filename)

	sheet_names_list = wb.get_sheet_names()

	for sheet_name in sheet_names_list:
		sheet = wb.get_sheet_by_name(sheet_name)

		calc_func = match_sheet_names(sheet_name)

		max_rows = sheet.max_row
		max_columns = sheet.max_column
		col_letters_list = column_letters[:max_columns]

		# Indexes the zone that each column corresponds to (by index)
		column_index = {"weight": weight_column,}
		for letter in col_letters_list:
			zone_value = sheet[letter + '1'].value
			if letter == 'A':
				continue
			zone_dic = find_zones(zone_value)
			process_column_zone_index(column_index, zone_dic, letter)

		for row_num in range(2, max_rows + 1):
			row = str(row_num)

			weight = sheet[weight_column + row].value
			for zone, column_letter in column_index.items():
				if zone != "weight":
					cell_loc = column_letter + row
					rate = sheet[cell_loc].value

					#blank rates are returned back
					if rate == '-':
						new_rate = rate
					else:
						new_rate = process_rate(calc_func, zone, weight, rate, annual_charge)
					# print(rate, new_rate)

					sheet[cell_loc] = new_rate
				else:
					# print("weight: " + str(sheet[column_letter + row].value))
					continue

	wb.save(new_filename)
	ffile.dir_back()
Exemplo n.º 5
0
def open_data_excel():
    ffile.move_dir("excel_data")

    mypath = '.'
    cust_id_letter = 'G'
    cbm_col_letter = 'O'
    container_id_letter = 'P'

    files_list = [f for f in os.listdir(mypath) if isfile(join(mypath, f))]

    total_data_dic = {}

    for filename in files_list:
        filename_encoded = filename.encode('utf8')
        wb = openpyxl.load_workbook(filename)
        sheetname_list = wb.get_sheet_names()

        file_data_dic = total_data_dic[filename_encoded] = {}

        for sheetname in sheetname_list:
            sheet = wb.get_sheet_by_name(sheetname)
            sheetname_encoded = sheetname.encode('utf8')
            data_dic = {}
            file_data_dic[sheetname_encoded] = data_dic
            max_row = sheet.max_row

            prev_pallet_id = ""
            for row in range(2, max_row + 1):
                row_str = str(row)

                cust_id = sheet[cust_id_letter + row_str].value
                if cust_id == None:
                    continue

                if cust_id not in data_dic:
                    data_dic[cust_id] = []

                try:
                    cbm = float(sheet[cbm_col_letter + row_str].value)
                except (ValueError, TypeError):
                    cbm = None
                pallet_id = sheet[pallet_id_letter + row_str].value

                if pallet_id == None:
                    pallet_id = prev_pallet_id

                data_dic[cust_id].append({"cbm": cbm, "pallet_id": pallet_id})

                prev_pallet_id = pallet_id
    ffile.dir_back()

    return total_data_dic
Exemplo n.º 6
0
def process_excel_fedex(file_name="fedex_rates.xlsx"):
    ffile.move_dir("data")

    wb = openpyxl.load_workbook(file_name)
    sheetnames_list = wb.get_sheet_names()

    rate_dic = {}

    for delivery_name, para_list in fedex_rate_proc_index.items():
        sheet = wb.get_sheet_by_name(delivery_name)
        rates = proc_sheet_for_rates(sheet, *para_list)
        # print(delivery_name)
        # print(rates)
        rate_dic[delivery_name] = rates

    # sheet = wb.get_sheet_by_name('Standard Overnight')
    # rates = proc_sheet_for_rates(sheet)
    # print(rates)

    ffile.dir_back()

    return rate_dic
Exemplo n.º 7
0
def output_raw_data(raw_ups_data):
    # outputs raw ups data into excel file
    ffile.move_dir("excel")

    wb = openpyxl.Workbook()
    sheet = wb.active
    i = 1

    for tracking_num, data_types in raw_ups_data.items():
        sheet['A' + str(i)] = tracking_num
        i += 1
        for data_type, data_list in data_types.items():
            sheet['A' + str(i)] = data_type
            i += 1
            for data in data_list:
                sheet['A' + str(i)] = str(data)
                # for j, key in enumerate(data):
                # 	sheet[alphabet_list[j * 2] + str(i)] = key
                # 	sheet[alphabet_list[j * 2 + 1] + str(i)] = str(data[key])
                i += 1
    wb.save('ups_raw_data.xlsx')

    ffile.dir_back()
Exemplo n.º 8
0
def process_excel_fedex(earned_num=0):
    """
    Input: earned_num(float), signifying the earned discount obtained.
    Output: rate_dic  -> [ sheet/ delivery name ] [ weight(dic) ][zone] = rate(float) 
    """
    ffile.move_dir("fedex_rates")

    earned_string = ''

    if earned_num == 0:
        earned_string = 'no'
    elif earned_num == 1:
        earned_string = '1st'
    elif earned_num == 2:
        earned_string = '2nd'
    elif earned_num == 3:
        earned_string = '3rd'
    elif earned_num == 4:
        earned_string = '4th'
    elif earned_num == 5:
        earned_string = '5th'

    rate_dic = {}

    fedex_rate_excel_filename = "fedex_discounted_rates_" + earned_string + "_earned.xlsx"

    wb = openpyxl.load_workbook(fedex_rate_excel_filename)
    sheet_names_list = wb.get_sheet_names()

    for sheet_name in sheet_names_list:
        sheet = wb.get_sheet_by_name(sheet_name)
        rates = proc_sheet_for_rates(sheet)
        rate_dic[sheet_name] = rates

    ffile.dir_back()

    return rate_dic
Exemplo n.º 9
0
def convert_file(filename, folder_name, new_filename, annual_charge):
    ffile.move_dir(folder_name)

    wb = openpyxl.load_workbook(filename)
    sheet_names = wb.get_sheet_names()

    for sheet_name in sheet_names:
        sheet = wb.get_sheet_by_name(sheet_name)
        zones_dic = excel_helper.get_zones(sheet)

        calc_func = get_calc_func(sheet_name)

        # print(zone_dic)

        num_rows = sheet.max_row
        weight_column = excel_helper.weight_column

        for row_num in range(int(excel_helper.zone_row) + 1, num_rows + 1):
            weight = sheet[weight_column + str(row_num)].value
            # print(weight)
            for column_letter, zone_dic in zones_dic.items():
                if column_letter == weight_column:
                    continue

                zone = zone_dic["start"]

                cell_loc = column_letter + str(row_num)

                # print(column_letter, zone_dic)
                full_rate = sheet[cell_loc].value
                # print(full_rate)
                new_rate = process_rate(calc_func, zone, weight, full_rate,
                                        annual_charge)
                sheet[cell_loc] = new_rate

    wb.save(new_filename)
    ffile.dir_back()
Exemplo n.º 10
0
def read_simple_ups(simple_ups_filename, folder_name):
    fieldnames_index = {}

    ffile.move_dir(folder_name)

    total_simple_ups_data = {}

    def filter_data(simple_ups):
        # Input: detail_ups_dic
        # Return True if this detail_ups_dic should be added.
        # False otherwise.
        tracking_num = simple_ups["tracking_num"]
        if tracking_num == "":
            return False
        else:
            return True

    def get_fieldnames(row):
        fieldnames_dic = {}
        for infile_name, file_fieldname in simple_ups_fieldnames.items():
            fieldnames_dic[infile_name] = row.index(file_fieldname)
        return fieldnames_dic

    def extract_data(row):
        ups_simple_dic = {}
        for fieldname, column in fieldnames_index.items():
            if fieldname == "billed_charge":
                ups_simple_dic[
                    fieldname] = excel_helper.convert_charge_string_to_float(
                        row[column])
            else:
                ups_simple_dic[fieldname] = row[column]
        return ups_simple_dic

    with open(simple_ups_filename) as f_simple:
        reader = csv.reader(f_simple)

        # find row with fieldnames & set fieldnames_index
        for row in reader:
            try:
                if "Account Number" in row:
                    fieldnames_index = get_fieldnames(row)
                    break
            except IndexError:
                pass

        tracking_num_column = fieldnames_index["tracking_num"]

        for row in reader:
            tracking_num = row[tracking_num_column]
            # print(tracking_num, simple_ups_filename)
            simple_ups_data = extract_data(row)
            if not filter_data(simple_ups_data):
                continue
            if tracking_num not in total_simple_ups_data:
                total_simple_ups_data[tracking_num] = [
                    simple_ups_data,
                ]
            else:
                total_simple_ups_data[tracking_num].append(simple_ups_data)

    ffile.dir_back()

    return total_simple_ups_data
Exemplo n.º 11
0
def read_detail_ups(detail_ups_filename, folder_name):
    total_detail_ups_data = {}

    fieldnames_index = {}
    prev_track_num = ""

    def extract_data_from_row(row):
        # Extracts data from the row of csv file using
        # fieldnames_index dic which gives which and what columns to extract
        ups_detail_dic = {}
        for fieldname, column_num in fieldnames_index.items():
            if fieldname == "billed_charge":
                ups_detail_dic[
                    fieldname] = excel_helper.convert_charge_string_to_float(
                        row[column_num])
            else:
                ups_detail_dic[fieldname] = row[column_num]
        return ups_detail_dic

    def filter_data(detail_ups):
        # Input: detail_ups_dic
        # Return True if this detail_ups_dic should be added.
        # False otherwise.
        tracking_num = detail_ups["tracking_num"]
        billed_charge = detail_ups["billed_charge"]
        if tracking_num == "":
            return False
        elif billed_charge == 0:
            return False
        else:
            return True

    ffile.move_dir(folder_name)

    fieldnames_index = get_detail_fieldnames_index()

    with open(detail_ups_filename) as f_detail:
        reader = csv.reader(f_detail)

        for row in reader:
            detail_ups_dic = extract_data_from_row(row)

            tracking_num = detail_ups_dic["tracking_num"]

            # skip those without tracking number or with 0 billed charge
            if not filter_data(detail_ups_dic):
                continue

            if tracking_num not in total_detail_ups_data:
                total_detail_ups_data[tracking_num] = [[
                    detail_ups_dic,
                ]]
            else:
                d_list = total_detail_ups_data[tracking_num]
                if prev_track_num == tracking_num:
                    # print(d_list[len(d_list) -1])
                    last_ups_detail_list = d_list[len(d_list) - 1]
                    last_ups_detail_list.append(detail_ups_dic)
                else:
                    d_list.append([
                        detail_ups_dic,
                    ])
            # print(tracking_num, prev_track_num, tracking_num == prev_track_num, tracking_num not in total_detail_ups_data)
            prev_track_num = tracking_num

            # print(detail_ups_dic)
    # print(fieldnames_index)
    ffile.dir_back()

    # print(total_detail_ups_data['1Z0019850396816706'])

    return total_detail_ups_data
Exemplo n.º 12
0
def get_pdf_index_dic():
    index_dic = {}

    ffile.move_dir("unread_pdf")

    try:
        wb = openpyxl.load_workbook('index.xlsx')
    except FileNotFoundError:
        print("index.xlsx was not found")
        return 0

    sheet_list = wb.get_sheet_names()
    sheetname = sheet_list[0]
    sheet = wb.get_sheet_by_name(sheetname)

    max_columns = sheet.max_column

    cur_year = ''
    cur_month = ''

    repeat_page_symbol = '-'


    for col in range(1, max_columns + 1, 4):
        pdf_name = str(sheet.cell(row=1, column=col).value)
        index_dic[pdf_name] = []

        num_pages = sheet.cell(row=2, column=col).value
        if num_pages == None:
            msg = "No num of pages " + pdf_name + " in column " + str(col)
            raise Exception(msg)

        pages_col_num = col + 1
        year_col = col + 1
        month_col = col + 2
        day_rcv_col = col + 3

        year = ''
        month = ''
        day_rcv = ''

        for page in range(1, num_pages + 1):
            year_cell = sheet.cell(row = page, column=year_col).value

            if year_cell != None:
                if year_cell < 10:
                    year = "0" + str(year_cell)
                else:
                    year = str(year_cell)

            month_cell = sheet.cell(row = page, column=month_col).value

            if month_cell != None:
                if month_cell < 10:
                    month = "0" + str(month_cell)
                else:
                    month = str(month_cell)

            day_rcv_cell = sheet.cell(row = page, column=day_rcv_col).value

            day_rcv = str(day_rcv_cell)

            if day_rcv == repeat_page_symbol:
                rcv_name = day_rcv
            else:
                rcv_name = year + month + day_rcv
            index_dic[pdf_name].append(rcv_name)

    ffile.dir_back()

    return index_dic
Exemplo n.º 13
0
    from PIL import Image

import pytesseract
import re
import os, time, os.path


def get_img_string(filename):
    if not filename.endswith('.jpg'):
        msg = filename + " is not a jpg file."
        raise Exception(msg)
    img_string = pytesseract.image_to_string(Image.open(filename))
    return img_string


ffile.move_dir("jpg_raw")

file_list = os.listdir()

for filename in file_list:

    img_string = get_img_string(filename)

    pattern = re.compile("RCV\d+-\d+")

    re_result = pattern.search(img_string)

    if re_result != None:
        new_filename = ''
        for i in range(0, 100):
            new_filename = re_result[0] + '_' + str(i) + '.jpg'
Exemplo n.º 14
0
def output_fedex_ups_dat(fedex_list_inst, ups_invoice_date, earned):
    dates_list = fedex_list_inst.get_dates()

    ffile.move_dir("excel")

    wb = openpyxl.Workbook()
    sheet = wb.active
    i = 1

    sheet.freeze_panes = 'A2'

    UPS_column_width = 30
    tracking_num_width = 20
    fedex_column_width = 30
    sheet.column_dimensions['D'].width = UPS_column_width
    sheet.column_dimensions['E'].width = tracking_num_width
    sheet.column_dimensions['F'].width = fedex_column_width

    header_list = [
        "Date", "Zone", "Weight", "UPS", "Track Num/Rate", "Fedex", "FX Rate"
    ]

    date_col_in = header_list.index("Date")
    date_col_letter = alphabet_list[date_col_in]

    zone_col_in = header_list.index("Zone")
    zone_col_letter = alphabet_list[zone_col_in]

    weight_col_in = header_list.index("Weight")
    weight_col_letter = alphabet_list[weight_col_in]

    ups_col_in = header_list.index("UPS")
    ups_col_letter = alphabet_list[ups_col_in]

    ups_rate_col_in = header_list.index("Track Num/Rate")
    ups_rate_col_letter = alphabet_list[ups_rate_col_in]

    fedex_col_in = header_list.index("Fedex")
    fedex_col_letter = alphabet_list[fedex_col_in]

    fedex_rate_col_in = header_list.index("FX Rate")
    fedex_rate_col_letter = alphabet_list[fedex_rate_col_in]

    for column_num, header in enumerate(header_list):
        column_letter = alphabet_list[column_num]
        sheet[column_letter + str(i)] = header
    i += 1

    for date in dates_list:
        tracking_num_list = fedex_list_inst.get_ups_tracking_nums(date)
        for tracking_num in tracking_num_list:
            data_dic = fedex_list_inst.get_first_dataset(date, tracking_num)
            zone = data_dic["Zone"]
            date = data_dic["Date"]
            weight = data_dic["Weight"]

            sheet[date_col_letter + str(i)] = date
            sheet[zone_col_letter + str(i)] = zone
            sheet[weight_col_letter + str(i)] = weight
            sheet[ups_col_letter + str(i)] = "UPS"
            sheet[ups_rate_col_letter + str(i)] = tracking_num
            sheet[fedex_col_letter + str(i)] = "Fedex"

            i += 1

            ups_data_dic = data_dic["ups"]
            fedex_data_dic = data_dic["fedex"]

            total_ups_rate = ups_data_dic["Total Charge"]
            total_fedex_rate = fedex_data_dic["Total Charge"]

            ups_data_list = ups_data_dic['Charges List']
            fedex_data_list = fedex_data_dic['Charges List']

            for list_index, ups_rate_dic in enumerate(ups_data_list):
                # print(ups_data_list, len(ups_data_list))
                # print(fedex_data_list, len(fedex_data_list))
                fedex_rate_dic = fedex_data_list[list_index]

                ups_rate = ups_rate_dic["Billed Charge"]
                ups_charge_type = ups_rate_dic["Charge Type"]

                fedex_rate = fedex_rate_dic["Billed Charge"]
                fedex_charge_type = fedex_rate_dic["Charge Type"]

                sheet[ups_col_letter + str(i)] = ups_charge_type
                sheet[ups_rate_col_letter + str(i)] = ups_rate
                sheet[fedex_col_letter + str(i)] = fedex_charge_type
                sheet[fedex_rate_col_letter + str(i)] = fedex_rate
                i += 1

            sheet[ups_col_letter + str(i)] = "TOTAL"
            sheet[ups_rate_col_letter + str(i)] = total_ups_rate
            sheet[fedex_col_letter + str(i)] = "TOTAL"
            sheet[fedex_rate_col_letter + str(i)] = total_fedex_rate

            diff_word_in = fedex_rate_col_in + 1
            diff_word_col_letter = alphabet_list[diff_word_in]
            diff_rate_in = fedex_rate_col_in + 2
            diff_rate_col_letter = alphabet_list[diff_rate_in]

            sheet[diff_word_col_letter + str(i)] = "Difference"
            sheet[diff_rate_col_letter +
                  str(i)] = total_ups_rate - total_fedex_rate

            i += 1

            # print(ups_data_list)
            # for ups_data_index in ups_data_list:
            # 	ups_charges_list = ups_data_list[ups_data_index]["Charges List"]
            # 	fedex_charges_list =fedex_data_list[ups_data_index]["Chares List"]
            # 	#UPS & Fedex should have the same keys
            # 	print(ups_charges_list)
            # 	print(fedex_charges_list)

    save_filename = ups_invoice_date + ' ups_and_fedex_data ' + earned + '_earned.xlsx'

    wb.save(save_filename)

    ffile.dir_back()