def convert(self, jsonFileName): outputFileName = jsonFileName.replace('.json', '.xlsx') # Check input file exists in file system self.log.info('Checking input file exists or not') if is_file_exists(jsonFileName): return # Load JSON data from file self.log.info("Loading JSON data from '%s'" % jsonFileName) json_obj = self.loadJsonData(jsonFileName) if not json_obj: self.log.error('Unable to read JSON data from file') return self.print_gstin(json_obj) # Open Excel file and create required work sheets workbook = openpyxl.Workbook() hsnWorksheet = workbook.create_sheet('HSN') b2csWorksheet = workbook.create_sheet('B2CS') b2bWorksheet = workbook.create_sheet('B2B') errWorksheet = workbook.create_sheet('Error Report') try: self.writeHsnData(json_obj, hsnWorksheet) self.writeB2CSData(json_obj, b2csWorksheet) self.write_b2b_data(json_obj, b2bWorksheet) self.writeErrData(json_obj, errWorksheet) except Exception: self.log.error('Exception during file parsing -> %s' % traceback.format_exc()) workbook.save(outputFileName) self.log.info("Output saved in file '%s'" % outputFileName) self.log.raw_line(':::::::::: Done ::::::::::')
def convert(self, input_file_name): self.log.info("Processing file '%s'" % input_file_name) output_file_name = input_file_name.split('/') worksheet = workbook = None file_name = (output_file_name.pop()).split('.')[0] if len(file_name) != 8: self.log.error( "Invalid file name format '%s'. Use XXYY9999 format" % file_name) else: comp_str = file_name[0:2] month_num = int(file_name[4:6]) yr_num = int(file_name[6:8]) data_type_str = file_name[2:4] output_file_name = '/'.join(output_file_name) output_file_name += '/%s_%s%d.xlsx'\ % (comp_str, commonLib.getMonthStr(month_num, 3), yr_num) # Append processing date only for 'PR' worksheet if data_type_str == "PR": data_type_str += "_" + get_curr_date("%d%b%y") if not is_file_exists(output_file_name): workbook = openpyxl.Workbook() workbook.guess_types = True worksheet = workbook.create_sheet(data_type_str) else: workbook = openpyxl.load_workbook(output_file_name) worksheet_list = workbook.get_sheet_names() if 'Sheet' in worksheet_list: worksheet = workbook.get_sheet_by_name('Sheet') workbook.remove_sheet(worksheet) if data_type_str in worksheet_list: worksheet = workbook.get_sheet_by_name(data_type_str) workbook.remove_sheet(worksheet) worksheet = workbook.create_sheet(data_type_str) col_data_dict = None try: with open(input_file_name, 'r') as filePtr: while True: txt_file_line = filePtr.readline() if not txt_file_line: break if txt_file_line.strip() == '': continue if not col_data_dict: col_data_dict = dict() char_index = 0 in_word = False curr_word_index = -1 for char in txt_file_line: if char == ' ' and in_word: col_data_dict[curr_word_index] = char_index in_word = False elif char != ' ' and not in_word: in_word = True curr_word_index = char_index col_data_dict[char_index] = -1 char_index += 1 sorted_dict_keys = sorted(col_data_dict.keys()) else: line_len = len(txt_file_line) xl_data_row = list() for field_index, startIndex \ in enumerate(sorted_dict_keys): if line_len < startIndex: continue data = txt_file_line[ startIndex:col_data_dict[startIndex]].strip() try: if field_index != 3: data = float(data) else: try: float(data) data = int(data) except: pass finally: data = "=CONCATENATE(\"%s\")" \ % int(data) except: pass xl_data_row.append(data) worksheet.append(xl_data_row) workbook.save(output_file_name) except Exception: self.log.error('Exception during file parsing -> %s' % traceback.format_exc()) self.log.info("Output saved in file '%s'" % output_file_name) self.log.raw_line(':::::::::: Done ::::::::::')
def process(self, input_file_name): if not is_file_exists(input_file_name, logger=self.log): return self.log.info("Processing file %s" % input_file_name) input_workbook = openpyxl.load_workbook(input_file_name) file_name = os.path.basename(input_file_name).split('.')[0].split('_') comp_initial = file_name[0] company_file_data = "%s %s - " % (comp_initial, file_name[1]) with open("data/company.yaml", "r") as fp: company = yaml.load(fp, Loader=yaml.FullLoader)["company"] if comp_initial not in company or company[comp_initial]["mail"] == "": self.log.error("Exiting: Mail id not defined for '%s'" % comp_initial) return for input_sheet_name in input_workbook.get_sheet_names(): if input_sheet_name != "PR": continue row_num = 0 num_blank_rows = 0 excel_data_dict = dict() excel_data_dict["itc_na"] = dict() excel_data_dict["gstr2_only"] = dict() process_sheet = False input_sheet = input_workbook.get_sheet_by_name(input_sheet_name) # Loop till finding first occurence of 'as per last Down' string while num_blank_rows < 5: row_num += 1 data_row = list() for col in range(ord('A'), ord('Q')): data_row.append(input_sheet[chr(col) + str(row_num)].value) if is_row_blank(data_row): num_blank_rows += 1 continue num_blank_rows = 0 if str(input_sheet['D%s' % row_num].value) \ == 'as per last Down': process_sheet = True break if not process_sheet: self.log.warning("Skipping sheet '%s'" % input_sheet_name) continue self.log.info("Processing sheet '%s'" % input_sheet_name) num_blank_rows = 0 report_date = "NA.NA.NA.NA" gst_portal_line_found = False while num_blank_rows < 5: data_row = list() row_num += 1 for col in range(ord('A'), ord('R')): data_row.append(input_sheet[chr(col) + str(row_num)].value) if is_row_blank(data_row): num_blank_rows += 1 continue # Reset num_blank_lines to continue reading excel sheet num_blank_rows = 0 row_len = len(data_row) if row_len > 14 and data_row[14] == 'ITC-NA': self.insert_data("itc_na", data_row, excel_data_dict) elif row_len >= 16 and data_row[16] \ == 'Available in GSTR-2 Only': self.insert_data("gstr2_only", data_row, excel_data_dict) # Fetching date for GST_PORTAL data if gst_portal_line_found: report_date = data_row[3] if data_row[6] == "from GST Portal": gst_portal_line_found = True else: gst_portal_line_found = False report_date = "/".join(report_date.split(".")[1:]) mail_count = 0 for r_type, excel_data in excel_data_dict.items(): m_data = mail_info[r_type] smtp_session = None if excel_data.keys(): self.log.info("Connecting to server...") smtp_session = smtplib.SMTP(Smtp.server, Smtp.port) smtp_session.ehlo() smtp_session.starttls() smtp_session.ehlo() smtp_session.login(Smtp.user_name, Smtp.password) self.log.info("Login successful") comp_name = \ company[comp_initial]["name"] \ + " GST # %s" % company[comp_initial]["gst"] curr_html_header = \ mail_info["html_header"] % (m_data["heading"] % comp_name, m_data["message"]) html_footer_comp_name = "" if r_type == "itc_na": html_footer_comp_name = comp_name curr_html_footer = \ mail_info["html_footer"] % (html_footer_comp_name, report_date) total = 0 for supp_name, supp_data in excel_data.items(): total += len(supp_data["rows"]) self.log.info("Sending mail for %s..." % supp_name) msg = MIMEMultipart('alternative') msg['From'] = Smtp.user_name msg['To'] = company[comp_initial]["mail"] msg['Subject'] = "%s %s::%s - %s" \ % (company_file_data, supp_name, supp_data["tin"], m_data["subject"]) total_igst = 0 total_cgst = 0 total_sgst = 0 mail_data = "" for row in supp_data["rows"]: row[0] = row[0].replace('=CONCATENATE("', '') row[0] = row[0].rstrip('")') mail_data += mail_info["table_tr_format"] % tuple(row) total_igst += float(row[4]) total_cgst += float(row[5]) total_sgst += float(row[6]) if len(supp_data["rows"]) > 1: mail_data += mail_info["table_total_tr_format"] \ % (total_igst, total_cgst, total_sgst) data_payload = MIMEText( curr_html_header + mail_data + curr_html_footer, 'html') msg.set_payload(data_payload) # msg.attach(data_to_attach) smtp_session.sendmail(Smtp.user_name, company[comp_initial]["mail"], msg.as_string()) mail_count += 1 # Reconnect server for every 20 mails to avoid SPAM warning if (mail_count % 20) == 0: smtp_session.quit() self.log.info("Re-connecting to server...") smtp_session = smtplib.SMTP(Smtp.server, Smtp.port) smtp_session.ehlo() smtp_session.starttls() smtp_session.ehlo() smtp_session.login(Smtp.user_name, Smtp.password) self.log.info("Login successful") # Exit smtp server if excel_data.keys(): smtp_session.quit() self.log.info("Total records for '%s': %s" % (r_type, total)) self.log.info("Total emails send: %s" % mail_count)
def convert(self, input_file_name): self.log.info("Processing file '%s'" % input_file_name) output_file_name = input_file_name.split('/') file_name = (output_file_name.pop()).split('.')[0] output_file_name = '/'.join(output_file_name) output_file_name += '/%s_parsed.xlsx' % file_name file_name_data = \ input_file_name \ .split("/")[-1] \ .split('.')[0] \ .split('_') gst_2yrm_val = file_name_data[0][2:] + file_name_data[0][0:2] xl_class = Gstr2bHeaders try: # Load input / output workbooks input_workbook = openpyxl.load_workbook(input_file_name) if not is_file_exists(output_file_name): output_workbook = openpyxl.Workbook() output_workbook.guess_types = True else: output_workbook = openpyxl.load_workbook(input_file_name) output_workbook_sheets = output_workbook.get_sheet_names() for input_sheet_name in xl_class.input_header.keys(): class_ref_name = input_sheet_name if input_sheet_name in ["CDNR", "CDNRA"]: input_sheet_name = "B2B-" + input_sheet_name if input_sheet_name not in input_workbook.get_sheet_names(): continue self.log.info('Processing sheet %s' % input_sheet_name) if input_sheet_name in output_workbook_sheets: self.log.warning('Sheet %s already exists! Will recreate..' % input_sheet_name) output_workbook.remove( output_workbook.get_sheet_by_name(input_sheet_name)) input_sheet = input_workbook.get_sheet_by_name( input_sheet_name) # Start of data extraction logic rows_to_append = self.extract_data_from_excel_sheet( class_ref_name, input_sheet, xl_class.start_row[input_sheet_name], xl_class) # Create and append data to output sheet output_sheet = output_workbook.create_sheet(input_sheet_name) output_sheet.append( xl_class.output_header[class_ref_name]) for row in rows_to_append: output_sheet.append(row) # Logic to append data to B2B sheet if input_sheet_name != "B2B": rows_to_append = self.shuffle_row_for_b2b_headers( class_ref_name, rows_to_append, gst_2yrm_val, xl_class) output_sheet = output_workbook["B2B"] for row in rows_to_append: output_sheet.append(row) output_workbook.save(output_file_name) self.log.info("Output saved in file '%s'" % output_file_name) except Exception: self.log.error('Exception during file parsing -> %s' % (traceback.format_exc())) self.log.raw_line(':::::::::: Done ::::::::::')