Пример #1
0
    def convert(self, jsonFileName):
        outputFileName = jsonFileName.replace('.json', '.xlsx')
        # Check input file exists in file system
        self.log.info('Checking input file exists or not')
        if is_file_exists(jsonFileName):
            return

        # Load JSON data from file
        self.log.info("Loading JSON data from '%s'" % jsonFileName)
        json_obj = self.loadJsonData(jsonFileName)
        if not json_obj:
            self.log.error('Unable to read JSON data from file')
            return

        self.print_gstin(json_obj)

        # Open Excel file and create required work sheets
        workbook = openpyxl.Workbook()
        hsnWorksheet = workbook.create_sheet('HSN')
        b2csWorksheet = workbook.create_sheet('B2CS')
        b2bWorksheet = workbook.create_sheet('B2B')
        errWorksheet = workbook.create_sheet('Error Report')

        try:
            self.writeHsnData(json_obj, hsnWorksheet)
            self.writeB2CSData(json_obj, b2csWorksheet)
            self.write_b2b_data(json_obj, b2bWorksheet)
            self.writeErrData(json_obj, errWorksheet)
        except Exception:
            self.log.error('Exception during file parsing -> %s' %
                           traceback.format_exc())

        workbook.save(outputFileName)
        self.log.info("Output saved in file '%s'" % outputFileName)
        self.log.raw_line(':::::::::: Done ::::::::::')
Пример #2
0
    def convert(self, input_file_name):
        self.log.info("Processing file '%s'" % input_file_name)
        output_file_name = input_file_name.split('/')
        worksheet = workbook = None
        file_name = (output_file_name.pop()).split('.')[0]
        if len(file_name) != 8:
            self.log.error(
                "Invalid file name format '%s'. Use XXYY9999 format" %
                file_name)
        else:
            comp_str = file_name[0:2]
            month_num = int(file_name[4:6])
            yr_num = int(file_name[6:8])
            data_type_str = file_name[2:4]
            output_file_name = '/'.join(output_file_name)
            output_file_name += '/%s_%s%d.xlsx'\
                                % (comp_str,
                                   commonLib.getMonthStr(month_num, 3),
                                   yr_num)

            # Append processing date only for 'PR' worksheet
            if data_type_str == "PR":
                data_type_str += "_" + get_curr_date("%d%b%y")

            if not is_file_exists(output_file_name):
                workbook = openpyxl.Workbook()
                workbook.guess_types = True
                worksheet = workbook.create_sheet(data_type_str)
            else:
                workbook = openpyxl.load_workbook(output_file_name)
                worksheet_list = workbook.get_sheet_names()

                if 'Sheet' in worksheet_list:
                    worksheet = workbook.get_sheet_by_name('Sheet')
                    workbook.remove_sheet(worksheet)

                if data_type_str in worksheet_list:
                    worksheet = workbook.get_sheet_by_name(data_type_str)
                    workbook.remove_sheet(worksheet)

                worksheet = workbook.create_sheet(data_type_str)

        col_data_dict = None
        try:
            with open(input_file_name, 'r') as filePtr:
                while True:
                    txt_file_line = filePtr.readline()
                    if not txt_file_line:
                        break

                    if txt_file_line.strip() == '':
                        continue

                    if not col_data_dict:
                        col_data_dict = dict()
                        char_index = 0
                        in_word = False
                        curr_word_index = -1
                        for char in txt_file_line:
                            if char == ' ' and in_word:
                                col_data_dict[curr_word_index] = char_index
                                in_word = False
                            elif char != ' ' and not in_word:
                                in_word = True
                                curr_word_index = char_index
                                col_data_dict[char_index] = -1
                            char_index += 1
                        sorted_dict_keys = sorted(col_data_dict.keys())
                    else:
                        line_len = len(txt_file_line)
                        xl_data_row = list()

                        for field_index, startIndex \
                                in enumerate(sorted_dict_keys):
                            if line_len < startIndex:
                                continue

                            data = txt_file_line[
                                startIndex:col_data_dict[startIndex]].strip()
                            try:
                                if field_index != 3:
                                    data = float(data)
                                else:
                                    try:
                                        float(data)
                                        data = int(data)
                                    except:
                                        pass
                                    finally:
                                        data = "=CONCATENATE(\"%s\")" \
                                               % int(data)
                            except:
                                pass

                            xl_data_row.append(data)

                        worksheet.append(xl_data_row)

            workbook.save(output_file_name)
        except Exception:
            self.log.error('Exception during file parsing -> %s' %
                           traceback.format_exc())

        self.log.info("Output saved in file '%s'" % output_file_name)
        self.log.raw_line(':::::::::: Done ::::::::::')
Пример #3
0
    def process(self, input_file_name):
        if not is_file_exists(input_file_name, logger=self.log):
            return

        self.log.info("Processing file %s" % input_file_name)
        input_workbook = openpyxl.load_workbook(input_file_name)
        file_name = os.path.basename(input_file_name).split('.')[0].split('_')
        comp_initial = file_name[0]
        company_file_data = "%s %s - " % (comp_initial, file_name[1])

        with open("data/company.yaml", "r") as fp:
            company = yaml.load(fp, Loader=yaml.FullLoader)["company"]

        if comp_initial not in company or company[comp_initial]["mail"] == "":
            self.log.error("Exiting: Mail id not defined for '%s'" %
                           comp_initial)
            return

        for input_sheet_name in input_workbook.get_sheet_names():
            if input_sheet_name != "PR":
                continue

            row_num = 0
            num_blank_rows = 0
            excel_data_dict = dict()
            excel_data_dict["itc_na"] = dict()
            excel_data_dict["gstr2_only"] = dict()
            process_sheet = False
            input_sheet = input_workbook.get_sheet_by_name(input_sheet_name)

            # Loop till finding first occurence of 'as per last Down' string
            while num_blank_rows < 5:
                row_num += 1
                data_row = list()
                for col in range(ord('A'), ord('Q')):
                    data_row.append(input_sheet[chr(col) + str(row_num)].value)

                if is_row_blank(data_row):
                    num_blank_rows += 1
                    continue

                num_blank_rows = 0
                if str(input_sheet['D%s' % row_num].value) \
                        == 'as per last Down':
                    process_sheet = True
                    break

            if not process_sheet:
                self.log.warning("Skipping sheet '%s'" % input_sheet_name)
                continue

            self.log.info("Processing sheet '%s'" % input_sheet_name)
            num_blank_rows = 0
            report_date = "NA.NA.NA.NA"
            gst_portal_line_found = False
            while num_blank_rows < 5:
                data_row = list()
                row_num += 1
                for col in range(ord('A'), ord('R')):
                    data_row.append(input_sheet[chr(col) + str(row_num)].value)

                if is_row_blank(data_row):
                    num_blank_rows += 1
                    continue

                # Reset num_blank_lines to continue reading excel sheet
                num_blank_rows = 0
                row_len = len(data_row)
                if row_len > 14 and data_row[14] == 'ITC-NA':
                    self.insert_data("itc_na", data_row, excel_data_dict)
                elif row_len >= 16 and data_row[16] \
                        == 'Available in GSTR-2 Only':
                    self.insert_data("gstr2_only", data_row, excel_data_dict)

                # Fetching date for GST_PORTAL data
                if gst_portal_line_found:
                    report_date = data_row[3]

                if data_row[6] == "from GST Portal":
                    gst_portal_line_found = True
                else:
                    gst_portal_line_found = False

            report_date = "/".join(report_date.split(".")[1:])
            mail_count = 0
            for r_type, excel_data in excel_data_dict.items():
                m_data = mail_info[r_type]
                smtp_session = None
                if excel_data.keys():
                    self.log.info("Connecting to server...")
                    smtp_session = smtplib.SMTP(Smtp.server, Smtp.port)
                    smtp_session.ehlo()
                    smtp_session.starttls()
                    smtp_session.ehlo()
                    smtp_session.login(Smtp.user_name, Smtp.password)
                    self.log.info("Login successful")

                comp_name = \
                    company[comp_initial]["name"] \
                    + " GST # %s" % company[comp_initial]["gst"]
                curr_html_header = \
                    mail_info["html_header"] % (m_data["heading"] % comp_name,
                                                m_data["message"])
                html_footer_comp_name = ""
                if r_type == "itc_na":
                    html_footer_comp_name = comp_name
                curr_html_footer = \
                    mail_info["html_footer"] % (html_footer_comp_name,
                                                report_date)
                total = 0
                for supp_name, supp_data in excel_data.items():
                    total += len(supp_data["rows"])
                    self.log.info("Sending mail for %s..." % supp_name)
                    msg = MIMEMultipart('alternative')
                    msg['From'] = Smtp.user_name
                    msg['To'] = company[comp_initial]["mail"]
                    msg['Subject'] = "%s %s::%s - %s" \
                                     % (company_file_data,
                                        supp_name,
                                        supp_data["tin"],
                                        m_data["subject"])
                    total_igst = 0
                    total_cgst = 0
                    total_sgst = 0
                    mail_data = ""
                    for row in supp_data["rows"]:
                        row[0] = row[0].replace('=CONCATENATE("', '')
                        row[0] = row[0].rstrip('")')
                        mail_data += mail_info["table_tr_format"] % tuple(row)
                        total_igst += float(row[4])
                        total_cgst += float(row[5])
                        total_sgst += float(row[6])

                    if len(supp_data["rows"]) > 1:
                        mail_data += mail_info["table_total_tr_format"] \
                                     % (total_igst, total_cgst, total_sgst)

                    data_payload = MIMEText(
                        curr_html_header + mail_data + curr_html_footer,
                        'html')
                    msg.set_payload(data_payload)
                    # msg.attach(data_to_attach)
                    smtp_session.sendmail(Smtp.user_name,
                                          company[comp_initial]["mail"],
                                          msg.as_string())
                    mail_count += 1

                    # Reconnect server for every 20 mails to avoid SPAM warning
                    if (mail_count % 20) == 0:
                        smtp_session.quit()
                        self.log.info("Re-connecting to server...")
                        smtp_session = smtplib.SMTP(Smtp.server, Smtp.port)
                        smtp_session.ehlo()
                        smtp_session.starttls()
                        smtp_session.ehlo()
                        smtp_session.login(Smtp.user_name, Smtp.password)
                        self.log.info("Login successful")

                # Exit smtp server
                if excel_data.keys():
                    smtp_session.quit()

                self.log.info("Total records for '%s': %s" % (r_type, total))

            self.log.info("Total emails send: %s" % mail_count)
Пример #4
0
    def convert(self, input_file_name):
        self.log.info("Processing file '%s'" % input_file_name)
        output_file_name = input_file_name.split('/')
        file_name = (output_file_name.pop()).split('.')[0]
        output_file_name = '/'.join(output_file_name)
        output_file_name += '/%s_parsed.xlsx' % file_name

        file_name_data = \
            input_file_name \
            .split("/")[-1] \
            .split('.')[0] \
            .split('_')
        gst_2yrm_val = file_name_data[0][2:] + file_name_data[0][0:2]
        xl_class = Gstr2bHeaders
        try:
            # Load input / output workbooks
            input_workbook = openpyxl.load_workbook(input_file_name)
            if not is_file_exists(output_file_name):
                output_workbook = openpyxl.Workbook()
                output_workbook.guess_types = True
            else:
                output_workbook = openpyxl.load_workbook(input_file_name)

            output_workbook_sheets = output_workbook.get_sheet_names()

            for input_sheet_name in xl_class.input_header.keys():
                class_ref_name = input_sheet_name
                if input_sheet_name in ["CDNR", "CDNRA"]:
                    input_sheet_name = "B2B-" + input_sheet_name

                if input_sheet_name not in input_workbook.get_sheet_names():
                    continue

                self.log.info('Processing sheet %s' % input_sheet_name)
                if input_sheet_name in output_workbook_sheets:
                    self.log.warning('Sheet %s already exists! Will recreate..'
                                     % input_sheet_name)
                    output_workbook.remove(
                        output_workbook.get_sheet_by_name(input_sheet_name))
                input_sheet = input_workbook.get_sheet_by_name(
                    input_sheet_name)

                # Start of data extraction logic
                rows_to_append = self.extract_data_from_excel_sheet(
                    class_ref_name, input_sheet,
                    xl_class.start_row[input_sheet_name], xl_class)

                # Create and append data to output sheet
                output_sheet = output_workbook.create_sheet(input_sheet_name)
                output_sheet.append(
                    xl_class.output_header[class_ref_name])
                for row in rows_to_append:
                    output_sheet.append(row)

                # Logic to append data to B2B sheet
                if input_sheet_name != "B2B":
                    rows_to_append = self.shuffle_row_for_b2b_headers(
                        class_ref_name,
                        rows_to_append,
                        gst_2yrm_val,
                        xl_class)
                    output_sheet = output_workbook["B2B"]

                    for row in rows_to_append:
                        output_sheet.append(row)

            output_workbook.save(output_file_name)
            self.log.info("Output saved in file '%s'" % output_file_name)
        except Exception:
            self.log.error('Exception during file parsing -> %s'
                           % (traceback.format_exc()))

        self.log.raw_line(':::::::::: Done ::::::::::')