def parse_statement(self): statement = Statement() statement.account_id = self.account_id statement.bank_id = self.bank_id statement.currency = self.currency rows = self.sheet_rows() for r in rows[2:]: bokf_date, _, _, _, balance = r # first row will be our start balance & date first_row = (statement.start_date is None) or (statement.start_balance is None) if first_row: statement.start_date = self.parse_datetime(bokf_date) statement.start_balance = self.parse_float(balance) # last row will be our end balance & date statement.end_balance = self.parse_float(balance) statement.end_date = self.parse_datetime(bokf_date) # # Use the fact that first cell contains the statement date as a suffix e.g. 'Kontoutdrag - 2018-01-04' # info_header_row = rows[0] m = re.match('Kontoutdrag *- *([0-9]{4}-[0-9]{2}-[0-9]{2})$', info_header_row[0]) if m: stmt_date, = m.groups() statement.end_date = self.parse_datetime(stmt_date) return statement
def test_ofxWriter(self) -> None: # Create sample statement: statement = Statement("BID", "ACCID", "LTL") statement.broker_id = "BROKERID" statement.end_date = datetime(2021, 5, 1) invest_line = InvestStatementLine( "3", datetime(2021, 1, 1), "Sample 3", "BUYSTOCK", "BUY", "AAPL", Decimal("-416.08"), ) invest_line.units = Decimal("3") invest_line.unit_price = Decimal("138.28") invest_line.fees = Decimal("1.24") invest_line.assert_valid() statement.invest_lines.append(invest_line) invest_line = InvestStatementLine( "4", datetime(2021, 1, 1), "Sample 4", "SELLSTOCK", "SELL", "MSFT", Decimal("1127.87"), ) invest_line.units = Decimal("-5") invest_line.unit_price = Decimal("225.63") invest_line.fees = Decimal("0.28") invest_line.assert_valid() statement.invest_lines.append(invest_line) invest_line = InvestStatementLine( "5", datetime(2021, 1, 1), "Sample 5", "INCOME", "DIV", "MSFT", Decimal("0.79"), ) invest_line.fees = Decimal("0.5") invest_line.assert_valid() statement.invest_lines.append(invest_line) # Create writer: writer = ofx.OfxWriter(statement) # Set the generation time so it is always predictable writer.genTime = datetime(2021, 5, 1, 0, 0, 0) assert prettyPrint(writer.toxml()) == SIMPLE_OFX
def _find_or_create_statement(self, accountid, currency): stmt = next(filter(lambda s: s.account_id == accountid and s.currency == currency, self.statements), None) if stmt is None: stmt = Statement() stmt.bank_id = self.bank_id stmt.account_id = accountid #stmt.account_type = "CREDITLINE" stmt.currency = currency self.statements.append(stmt) return stmt
def _parse_statement(self, stmt): statement = Statement() statement.currency = self.currency bnk = stmt.find('./s:Acct/s:Svcr/s:FinInstnId/s:BIC', self.xmlns) if bnk is None: bnk = stmt.find('./s:Acct/s:Svcr/s:FinInstnId/s:Nm', self.xmlns) iban = stmt.find('./s:Acct/s:Id/s:IBAN', self.xmlns) other = stmt.find('./s:Acct/s:Id/s:Othr/s:Id', self.xmlns) ccy = stmt.find('./s:Acct/s:Ccy', self.xmlns) bals = stmt.findall('./s:Bal', self.xmlns) acctCurrency = ccy.text if ccy is not None else None if acctCurrency: statement.currency = acctCurrency else: if statement.currency is None: raise exceptions.ParseError( 0, "No account currency provided in statement. Please " "specify one in configuration file (e.g. currency=EUR)") bal_amts = {} bal_dates = {} for bal in bals: cd = bal.find('./s:Tp/s:CdOrPrtry/s:Cd', self.xmlns) amt = bal.find('./s:Amt', self.xmlns) dt = bal.find('./s:Dt', self.xmlns) amt_ccy = amt.get('Ccy') # Amount currency should match with statement currency if amt_ccy != statement.currency: continue bal_amts[cd.text] = self._parse_amount(amt) bal_dates[cd.text] = self._parse_date(dt) if not bal_amts: raise exceptions.ParseError( 0, "No statement balance found for currency '%s'. Check " "currency of statement file." % statement.currency) statement.bank_id = bnk.text if bnk is not None else None statement.account_id = iban.text if iban is not None else other.text statement.start_balance = bal_amts['OPBD'] statement.start_date = bal_dates['OPBD'] statement.end_balance = bal_amts['CLBD'] statement.end_date = bal_dates['CLBD'] self._parse_lines(stmt, statement) return statement
def __init__(self, filename, settings): self.fin = filename self.settings = settings self.statement = Statement(bank_id='0212', currency='BRL') self.statement.acct_id = self.settings.get("account"), self.statement.branch_id = '1'
def parse(self): """Main entry point for parsers super() implementation will call to split_records and parse_record to process the file. """ with open(self.filename, "r") as f: self.input = f lines = [line for line in csv.reader(f)] field_names =lines[0] date_index = self.get_index_from_first_line(field_names) (meta, raw_records)= self.merge_lines_on_field(date_index,field_names,lines[1:]) lines = self.to_statement_line( raw_records) statement= Statement() statement.account_id = self.settings['iban'] statement.lines = lines return statement
def __init__(self, file_name, file_encoding, account_id): self.statement = Statement( currency='EUR', bank_id='FRSPDE66XXX', # BIC Sparkasse Freiburg account_id=account_id, ) self.file_name = file_name self.file_encoding = file_encoding
def parse_statement(self): statement = Statement() sheet = self.workbook.active # We need only first 3 rows here. rows = take(3, sheet.iter_rows()) rows = [[c.value for c in row] for row in rows] assert len(rows) == 3 header_row, account_row, footer_row = rows account_id, saldo, disponibelt_belopp, beviljad_kredit, _1, _2 = account_row statement.account_id = account_id statement.end_balance = atof(saldo, self.locale) statement.bank_id = self.bank_id statement.currency = self.currency_id for r in self.footer_regexps: m = re.match(r, footer_row[0]) if m and m.groups(): part_from, part_to = m.groups() statement.start_date = self.parse_datetime(part_from) statement.end_date = self.parse_datetime(part_to) return statement
def parse(self): statement = Statement(bank_id = 'KOMBCZPP', currency = 'CZK') with open(self.filename, encoding = 'cp1250') as f: reader = csv.reader(f, delimiter = ';') header = self.parse_intro(reader, statement) self.parse_transactions(reader, header, statement) return statement
def __init__(self, filename): self.fin = filename self.statement = Statement() self.statement.account_id = self._get_account_id() self.statement.currency = self._get_currency() self.statement.start_balance = self._get_start_balance() self.statement.start_date = self._get_start_date() self.statement.end_balance = self._get_end_balance() self.statement.end_date = self._get_end_date() logging.debug(self.statement)
def parse(self): """Main entry point for parsers """ self.statement = Statement() tree = ET.parse(self.filename) self._parse_statement_properties(tree) self._parse_lines(tree) return self.statement
def parser_old(filename): def parse_tr(tr): def get_header(tr, n): return str( tr.find_all(headers='header' + str(n))[0].string) hdrs = tr.find_all(headers=True) assert len(hdrs) in [0, 7] if len(hdrs) == 0: cont = True date_or_comm = str(tr.find_all(colspan='5')[0].string) desc = None paym = None depo = None bal = None else: cont = False date_or_comm = dateutil.parser.parse(get_header(tr, 1), dayfirst=True) desc = get_header(tr, 2) paym = self.get_float(get_header(tr, 5)) depo = self.get_float(get_header(tr, 6)) assert (depo is None) != ( paym is None ), 'Either depo or paym need to exist, but not both: ' + str( tr) bal = self.get_float(get_header(tr, 7)) return cont, date_or_comm, desc, paym, depo, bal with open(filename, 'r', encoding='iso-8859-8') as f: soup = BeautifulSoup(f, 'lxml') statement = Statement(currency='ILS') data = [] for tr in soup.find_all( 'table', id='mytable_body')[0].find_all(id='TR_ROW_BANKTABLE'): cont, date_or_comm, desc, paym, depo, bal = parse_tr(tr) if cont: data[-1][DATA_MEMO] = date_or_comm else: new_line = {} new_line[DATA_DATE] = date_or_comm new_line[DATA_DSC] = desc new_line[ DATA_AMT] = -paym if paym is not None else depo new_line[DATA_BAL] = bal new_line[DATA_MEMO] = None data.append(new_line) return data
def parse(self): """Main entry point for parsers super() implementation will call to split_records and parse_record to process the file. """ with open(self.filename, "r", encoding='iso-8859-8') as f: soup = BeautifulSoup(f, 'lxml') statement = Statement() table = soup.find_all('table', id='trBlueOnWhite12') if len(table) == 0: raise ParseError(0, "'trBlueonWhite12' table not found") q.d() return statement
def parse(self) -> Statement: """Main entry point for parsers""" self.statement = Statement() self.statement.currency = self.currency tree = ET.parse(self.filename) # Find out XML namespace and make sure we can parse it ns = self._get_namespace(tree.getroot()) self.version = self._recognize_version(ns) self.xmlns = {"s": ns} self._parse_statement_properties(tree) self._parse_lines(tree) return self.statement
def parse(self): statement = Statement(bank_id='321081669', currency='USD') with open(self.filename) as f: for row in csv.DictReader(f): line = StatementLine(id=row['Transaction Number'], date=self.parse_datetime(row['Date']), memo=row['Statement Description'], amount=Decimal(row['Debit'] or row['Credit'])) line.payee = row['Description'] line.check_no = row['Check Number'] line.trntype = self.guess_type(line.payee, line.amount) statement.lines.append(line) return statement
def test_ofxWriter(self): # Create sample statement: statement = Statement("BID", "ACCID", "LTL") statement.lines.append(StatementLine( "1", datetime(2012, 2, 12), "Sample 1", 15.4)) line = StatementLine("2", datetime(2012, 2, 12), "Sample 2", 25.0) line.payee = '' line.bank_account_to = BankAccount("SNORAS", "LT1232") line.bank_account_to.branch_id = "VNO" statement.lines.append(line) # Create writer: writer = ofx.OfxWriter(statement) # Set the generation time so it is always predictable writer.genTime = datetime(2012, 3, 3, 0, 0, 0) assert prettyPrint(writer.toxml()) == SIMPLE_OFX
def __init__(self, fin, account_id, currency, encoding=None, locale=None, analyze=False): self.account_id = account_id self.currency = currency self.locale = locale self.encoding = encoding self.analyze = analyze with open(fin, 'r', encoding=self.encoding) as f: self.lines = f.readlines() self.validate() self.statement = Statement(bank_id=self.bank_id, account_id=self.account_id, currency=self.currency)
class LansforsakringarParser(StatementParser): statement = Statement(currency='SEK') def __init__(self, filename, bank_id, account_id): self.filename = filename self.statement.bank_id = bank_id self.statement.account_id = account_id self.sheet = None self.row_num = 0 def parse(self): with xlrd.open_workbook(self.filename) as book: self.sheet = book.sheet_by_index(0) return super().parse() def split_records(self): rows = self.sheet.get_rows() next(rows) # statement date next(rows) # headers return rows def parse_record(self, row): self.row_num += 1 line = StatementLine() line.date = self.parse_datetime(row[0].value) line.date_user = self.parse_datetime(row[1].value) line.refnum = str(self.row_num) line.memo = row[2].value line.amount = row[3].value line.trntype = self.get_type(line) line.id = generate_transaction_id(line) return line @staticmethod def get_type(line): if line.amount > 0: return 'CREDIT' elif line.amount < 0: return 'DEBIT' else: return 'OTHER'
def parse(self): """Main entry point for parsers """ self.statement = Statement() self.statement.currency = self.currency tree = ET.parse(self.filename) # Find out XML namespace and make sure we can parse it ns = self._get_namespace(tree.getroot()) if not ns.startswith(ISO20022_NAMESPACE_ROOT): raise ParseError("Cannot recognize ISO20022 XML") self.xmlns = { "s": ns } self._parse_statement_properties(tree) self._parse_lines(tree) return self.statement
def test_ofxWriter(self) -> None: # Create sample statement: statement = Statement("BID", "ACCID", "LTL") statement.lines.append( StatementLine("1", datetime(2012, 2, 12), "Sample 1", Decimal("15.4"))) line = StatementLine("2", datetime(2012, 2, 12), "Sample 2", Decimal("25.0")) line.payee = "" line.bank_account_to = BankAccount("SNORAS", "LT1232") line.bank_account_to.branch_id = "VNO" line.currency = Currency("USD") line.orig_currency = Currency("EUR", Decimal("3.4543")) statement.lines.append(line) # Create writer: writer = ofx.OfxWriter(statement) # Set the generation time so it is always predictable writer.genTime = datetime(2012, 3, 3, 0, 0, 0) assert prettyPrint(writer.toxml()) == SIMPLE_OFX
def parse(self): DATA_DATE = 'data_date' DATA_AMT = 'data_amt' DATA_DSC = 'data_dsc' DATA_MEMO = 'data_mem' DATA_BAL = 'data_bal' def parser_old(filename): def parse_tr(tr): def get_header(tr, n): return str( tr.find_all(headers='header' + str(n))[0].string) hdrs = tr.find_all(headers=True) assert len(hdrs) in [0, 7] if len(hdrs) == 0: cont = True date_or_comm = str(tr.find_all(colspan='5')[0].string) desc = None paym = None depo = None bal = None else: cont = False date_or_comm = dateutil.parser.parse(get_header(tr, 1), dayfirst=True) desc = get_header(tr, 2) paym = self.get_float(get_header(tr, 5)) depo = self.get_float(get_header(tr, 6)) assert (depo is None) != ( paym is None ), 'Either depo or paym need to exist, but not both: ' + str( tr) bal = self.get_float(get_header(tr, 7)) return cont, date_or_comm, desc, paym, depo, bal with open(filename, 'r', encoding='iso-8859-8') as f: soup = BeautifulSoup(f, 'lxml') statement = Statement(currency='ILS') data = [] for tr in soup.find_all( 'table', id='mytable_body')[0].find_all(id='TR_ROW_BANKTABLE'): cont, date_or_comm, desc, paym, depo, bal = parse_tr(tr) if cont: data[-1][DATA_MEMO] = date_or_comm else: new_line = {} new_line[DATA_DATE] = date_or_comm new_line[DATA_DSC] = desc new_line[ DATA_AMT] = -paym if paym is not None else depo new_line[DATA_BAL] = bal new_line[DATA_MEMO] = None data.append(new_line) return data def parser_new(filename): with open(filename, 'r', encoding='iso-8859-8') as f: bs = BeautifulSoup(f, 'lxml') trs = bs.find_all('table', i__d='trBlueOnWhite12')[0].find_all( 'tr', recursive=False) data = [] for tr in trs[1:]: new_line = {} tds = tr.find_all('td') if len(tds) != 7: continue new_line[DATA_DATE] = dateutil.parser.parse(str(tds[0].string), dayfirst=True) new_line[DATA_DSC] = str(tds[1].string) paym = self.get_float(str(tds[4].string)) depo = self.get_float(str(tds[5].string)) assert (depo is None) != ( paym is None ), 'Either depo or paym need to exist, but not both: ' + str( tds) new_line[DATA_AMT] = -paym if paym is not None else depo new_line[DATA_BAL] = self.get_float(str(tds[6].string)) new_line[DATA_MEMO] = None data.append(new_line) return data def parser_xslx(filename): with open(filename, 'rb') as f: ws = openpyxl.load_workbook(f).worksheets[0] iterrows = iter(ws.rows) for _ in range(6): next(iterrows) data = [] for row in iterrows: new_line = {} new_line[DATA_DATE] = dateutil.parser.parse(str(row[0].value), dayfirst=False) new_line[DATA_DSC] = str(row[1].value) paym = self.get_float(str(row[3].value)) depo = self.get_float(str(row[4].value)) assert (depo is None) != ( paym is None), 'Either depo or paym need to exist, but not both: ' new_line[DATA_AMT] = -paym if paym is not None else depo new_line[DATA_BAL] = self.get_float(str(row[5].value)) new_line[DATA_MEMO] = " ".join( [str(c.value) for c in row[7:] if c.value is not None]) data.append(new_line) return data PARSERS = [parser_old, parser_new, parser_xslx] v = self.detect_version() assert v is not None, "Unsupported file %s" % self.filename self.log("Detected file of version %d" % v) data = PARSERS[v](self.filename) self.log('Found %d transactions' % len(data)) stmnt = Statement(currency='ILS') for d in data: stmt_line = StatementLine(date=d[DATA_DATE], amount=d[DATA_AMT]) #stmt_line.end_balance = d[DATA_BAL] #TODO: conf stmt_line.payee = d[DATA_DSC] if d[DATA_MEMO] is not None: stmt_line.memo = d[DATA_MEMO] if stmt_line.payee.startswith('משיכה'): stmt_line.trntype = 'ATM' elif stmt_line.payee.startswith('שיק'): stmt_line.trntype = 'CHECK' else: stmt_line.trntype = "CASH" if d[DATA_AMT] < 0 else "DEP" stmt_line.assert_valid() stmnt.lines.append(stmt_line) return stmnt
class LansforsakringarParser(StatementParser): statement = Statement(currency='SEK') def __init__(self, filename, bank_id, account_id): self.filename = filename self.statement.bank_id = bank_id self.statement.account_id = account_id self.sheet = None self.row_num = 0 self.seen = {} def parse(self): with xlrd.open_workbook(self.filename) as book: self.sheet = book.sheet_by_index(0) return super().parse() def split_records(self): rows = self.sheet.get_rows() datestr = next(rows) # statement date assert datestr[0].value.startswith("Kontoutdrag -") # end of statement date's day self.statement.end_date = self.parse_datetime( datestr[0].value[13:]) + timedelta(days=1) next(rows) # headers return rows def parse_record(self, row): self.row_num += 1 line = StatementLine() line.date = self.parse_datetime(row[0].value) line.date_user = self.parse_datetime(row[1].value) line.refnum = str(self.row_num) line.memo = row[2].value line.amount = row[3].value line.trntype = self.get_type(line) if self.statement.start_balance is None and self.row_num == 1: self.statement.start_balance = row[4].value - line.amount self.statement.start_date = line.date_user self.statement.end_balance = row[4].value line.id = self.generate_transaction_id(line) if line.id in self.seen: log.warn( "Transaction with duplicate FITID generated:\n%s\n%s\n\n" % (line, self.seen[line.id])) else: self.seen[line.id] = line return line def generate_transaction_id(self, stmt_line): """Generate pseudo-unique id for given statement line. This function can be used in statement parsers when real transaction id is not available in source statement. Modified version of ofxstatement's function of the same name. Includes refnum (in our case, row number) into the hash; this is safe here as Kontoutdrag is only available after the reporting period is over i.e. it should never change. """ h = sha1() h.update(stmt_line.date.strftime("%Y-%m-%d %H:%M:%S").encode("utf8")) h.update(stmt_line.refnum.encode("utf8")) h.update(stmt_line.memo.encode("utf8")) h.update(str(stmt_line.amount).encode("utf8")) return h.hexdigest() @staticmethod def get_type(line): if line.amount > 0: return 'CREDIT' elif line.amount < 0: return 'DEBIT' else: return 'OTHER'
def __init__(self, filename): self.statement = Statement(None, None, 'HUF') self.filename = filename
def __init__(self, filename): self.statement = Statement('NEECBEB2', None, 'EUR') self.fin = filename csv.register_dialect('newbcsv', NewBCsv())
def __init__(self, file_obj): super(PostbankXMLParser, self).__init__() self.__data = file_obj.read() self.statement = Statement()
def __init__(self, filename): self.statement = Statement('GKCCBEBB', None, 'EUR') self.fin = filename csv.register_dialect('belfiuscsv', BelfiusCsv())
def __init__(self) -> None: self.statement = Statement()
def __init__(self): self.statement = Statement()
def __init__(self, fin, encoding): self.encoding = encoding self.fin = fin self.statement = Statement()
def __init__(self, filename): self.statement = Statement() self.filename = filename
def __init__(self, fin): self.statement = Statement() self.fin = fin