Пример #1
0
class OfcParser:
    """Dirt-simple OFC parser for interpreting OFC documents."""
    def __init__(self, debug=False):
        aggregate = Forward().setResultsName("OFC")
        aggregate_open_tag, aggregate_close_tag = self._tag()
        content_open_tag = self._tag(closed=False)
        content = Group(content_open_tag + CharsNotIn("<\r\n"))
        aggregate << Group(aggregate_open_tag \
            + Dict(OneOrMore(aggregate | content)) \
            + aggregate_close_tag)

        self.parser = Group(aggregate).setResultsName("document")
        if (debug):
            self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction,
                                        ofxtools._ofxtoolsSuccessDebugAction,
                                        ofxtools._ofxtoolsExceptionDebugAction)

    def _tag(self, closed=True):
        """Generate parser definitions for OFX tags."""
        openTag = Literal("<").suppress() + Word(alphanums + ".") \
            + Literal(">").suppress()
        if (closed):
            closeTag = Group("</" + Word(alphanums + ".") + ">" +
                             ZeroOrMore(White())).suppress()
            return openTag, closeTag
        else:
            return openTag

    def parse(self, ofc):
        """Parse a string argument and return a tree structure representing
        the parsed document."""
        return self.parser.parseString(ofc).asDict()
Пример #2
0
class OfcParser:
    """Dirt-simple OFC parser for interpreting OFC documents."""
    def __init__(self, debug=False):
        aggregate = Forward().setResultsName("OFC")
        aggregate_open_tag, aggregate_close_tag = self._tag()
        content_open_tag = self._tag(closed=False)
        content = Group(content_open_tag + CharsNotIn("<\r\n"))
        aggregate << Group(aggregate_open_tag \
            + Dict(OneOrMore(aggregate | content)) \
            + aggregate_close_tag)

        self.parser = Group(aggregate).setResultsName("document")
        if (debug):
            self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction,
                                        ofxtools._ofxtoolsSuccessDebugAction,
                                        ofxtools._ofxtoolsExceptionDebugAction)

    def _tag(self, closed=True):
        """Generate parser definitions for OFX tags."""
        openTag = Literal("<").suppress() + Word(alphanums + ".") \
            + Literal(">").suppress()
        if (closed):
            closeTag = Group("</" + Word(alphanums + ".") + ">" + ZeroOrMore(White())).suppress()
            return openTag, closeTag
        else:
            return openTag

    def parse(self, ofc):
        """Parse a string argument and return a tree structure representing
        the parsed document."""
        ofc = self.add_zero_to_empty_ledger_tag(ofc)
        ofc = self.remove_inline_closing_tags(ofc)
        ofc = ofxtools.util.strip_empty_tags(ofc)
        ofc = self._translate_chknum_to_checknum(ofc)
        # if you don't have a good stomach, skip this part
        # XXX:needs better solution
        import sys
        sys.setrecursionlimit(5000)
        try:
          return self.parser.parseString(ofc).asDict()
        except ParseException:
          fixed_ofc = self.fix_ofc(ofc)
          return self.parser.parseString(fixed_ofc).asDict()

    def add_zero_to_empty_ledger_tag(self, ofc):
        """
        Fix an OFC, by adding zero to LEDGER blank tag
        """
        return re.compile(r'<LEDGER>(\D*\n)', re.UNICODE).sub(r'<LEDGER>0\1', ofc)

    def remove_inline_closing_tags(self, ofc):
        """
        Fix an OFC, by removing inline closing 'tags'
        """
        return re.compile(r'(\w+.*)<\/\w+>', re.UNICODE).sub(r'\1', ofc)

    def fix_ofc(self, ofc):
        """
        Do some magic to fix an bad OFC
        """
        ofc = self._remove_bad_tags(ofc)
        ofc = self._fill_dummy_tags(ofc)
        return self._inject_tags(ofc)

    def _remove_bad_tags(self, ofc):
        ofc_without_trnrs = re.sub(r'<[/]*TRNRS>', '', ofc)
        return re.sub(r'<[/]*CLTID>\w+', '', ofc_without_trnrs)

    def _fill_dummy_tags(self, ofc):
        expression = r'(<%s>)[^\w+]'
        replacement = r'<%s>0\n'
        ofc = re.sub(expression % 'FITID', replacement % 'FITID' , ofc)
        filled_ofc = re.sub(expression % 'CHECKNUM', replacement % 'CHECKNUM' , ofc)

        return filled_ofc

    def _translate_chknum_to_checknum(self, ofc):
        """
        Some banks put an CHKNUM instead of CHECKNUM. this method translates
        CHKNUM to CHECKNUM in order to parse this information correctly
        """
        return re.sub('CHKNUM', 'CHECKNUM', ofc)

    def _inject_tags(self, ofc):
        tags ="<OFC>\n<ACCTSTMT>\n<ACCTFROM>\n<BANKID>0\n<ACCTID>0\n<ACCTTYPE>0\n</ACCTFROM>\n"
        if not re.findall(r'<OFC>\w*\s*<ACCTSTMT>', ofc):
            return ofc.replace('<OFC>', tags).replace('</OFC>', '</ACCTSTMT>\n</OFC>')
Пример #3
0
class QifParser:
    def __init__(self, debug=False):
        account_items       = { 'N' : "Name",
                                'T' : "AccountType",
                                'D' : "Description",
                                'L' : "CreditLimit",
                                'X' : "UnknownField",
                                'B' : "Balance",
                                '/' : "BalanceDate",
                                '$' : "Balance" }

        noninvestment_items = { 'D' : "Date",
                                'T' : "Amount",
                                'U' : "Amount2",
                                'C' : "Cleared",
                                'N' : "Number",
                                'P' : "Payee",
                                'M' : "Memo",
                                'L' : "Category",
                                'A' : "Address",
                                'S' : "SplitCategory",
                                'E' : "SplitMemo",
                                '$' : "SplitAmount",
                                '-' : "NegativeSplitAmount" }

        investment_items    = { 'D' : "Date",
                                'N' : "Action",
                                'Y' : "Security",
                                'I' : "Price",
                                'Q' : "Quantity",
                                'T' : "Amount",
                                'C' : "Cleared",
                                'P' : "Text",
                                'M' : "Memo",
                                'O' : "Commission",
                                'L' : "TransferAccount",
                                '$' : "TransferAmount" }

        category_items      = { 'N' : "Name",
                                'D' : "Description",
                                'T' : "TaxRelated",
                                'I' : "IncomeCategory",
                                'E' : "ExpenseCategory",
                                'B' : "BudgetAmount",
                                'R' : "TaxSchedule" }

        class_items         = { 'N' : "Name",
                                'D' : "Description" }

        options   = Group(CaselessLiteral('!Option:') + restOfLine).suppress()

        banktxns  = Group(CaselessLiteral('!Type:Bank').suppress() +
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("BankTransactions")

        cashtxns  = Group(CaselessLiteral('!Type:Cash').suppress() +
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("CashTransactions")

        ccardtxns = Group(Or([CaselessLiteral('!Type:CCard').suppress(),
                              CaselessLiteral('!Type!CCard').suppress()]) +
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("CreditCardTransactions")

        liabilitytxns = Group(CaselessLiteral('!Type:Oth L').suppress() +
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("CreditCardTransactions")

        invsttxns = Group(CaselessLiteral('!Type:Invst').suppress() +
                          ZeroOrMore(self._items(investment_items))
                          ).setResultsName("InvestmentTransactions")

        acctlist  = Group(CaselessLiteral('!Account').suppress() +
                          ZeroOrMore(Or([self._items(account_items, name="AccountInfo")]))
                          ).setResultsName("AccountList")

        category  = Group(CaselessLiteral('!Type:Cat').suppress() +
                          ZeroOrMore(self._items(category_items))
                          ).setResultsName("CategoryList")

        classlist = Group(CaselessLiteral('!Type:Class').suppress() +
                          ZeroOrMore(self._items(category_items))
                          ).setResultsName("ClassList")

        self.parser = Group(ZeroOrMore(White()).suppress() +
                            ZeroOrMore(acctlist).suppress() +
                            OneOrMore(ccardtxns | cashtxns | banktxns | liabilitytxns | invsttxns) +
                            ZeroOrMore(category | classlist).suppress() +
                            ZeroOrMore(White()).suppress()
                            ).setResultsName("QifStatement")

        if (debug):
            self.parser.setDebugActions(_ofxtoolsStartDebugAction,
                                        _ofxtoolsSuccessDebugAction,
                                        _ofxtoolsExceptionDebugAction)


    def _items(self, items, name="Transaction"):
        item_list = []
        for (code, name) in items.items():
            item = self._item(code, name)
            item_list.append(item)
        return Group(OneOrMore(Or(item_list)) +
                     oneOf('^EUR ^').setResultsName('Currency') +
                     LineEnd().suppress()
                     ).setResultsName(name)

    def _item(self, code, name):
        return CaselessLiteral(code).suppress() + \
               restOfLine.setResultsName(name) + \
               LineEnd().suppress()

    def parse(self, qif):
        return self.parser.parseString(qif)
Пример #4
0
class OfcParser:
    """Dirt-simple OFC parser for interpreting OFC documents."""
    def __init__(self, debug=False):
        aggregate = Forward().setResultsName("OFC")
        aggregate_open_tag, aggregate_close_tag = self._tag()
        content_open_tag = self._tag(closed=False)
        content = Group(content_open_tag + CharsNotIn("<\r\n"))
        aggregate << Group(aggregate_open_tag \
            + Dict(OneOrMore(aggregate | content)) \
            + aggregate_close_tag)

        self.parser = Group(aggregate).setResultsName("document")
        if (debug):
            self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction,
                                        ofxtools._ofxtoolsSuccessDebugAction,
                                        ofxtools._ofxtoolsExceptionDebugAction)

    def _tag(self, closed=True):
        """Generate parser definitions for OFX tags."""
        openTag = Literal("<").suppress() + Word(alphanums + ".") \
            + Literal(">").suppress()
        if (closed):
            closeTag = Group("</" + Word(alphanums + ".") + ">" +
                             ZeroOrMore(White())).suppress()
            return openTag, closeTag
        else:
            return openTag

    def parse(self, ofc):
        """Parse a string argument and return a tree structure representing
        the parsed document."""
        ofc = self.add_zero_to_empty_ledger_tag(ofc)
        ofc = self.remove_inline_closing_tags(ofc)
        ofc = ofxtools.util.strip_empty_tags(ofc)
        ofc = self._translate_chknum_to_checknum(ofc)
        # if you don't have a good stomach, skip this part
        # XXX:needs better solution
        import sys
        sys.setrecursionlimit(5000)
        try:
            return self.parser.parseString(ofc).asDict()
        except ParseException:
            fixed_ofc = self.fix_ofc(ofc)
            return self.parser.parseString(fixed_ofc).asDict()

    def add_zero_to_empty_ledger_tag(self, ofc):
        """
        Fix an OFC, by adding zero to LEDGER blank tag
        """
        return re.compile(r'<LEDGER>(\D*\n)',
                          re.UNICODE).sub(r'<LEDGER>0\1', ofc)

    def remove_inline_closing_tags(self, ofc):
        """
        Fix an OFC, by removing inline closing 'tags'
        """
        return re.compile(r'(\w+.*)<\/\w+>', re.UNICODE).sub(r'\1', ofc)

    def fix_ofc(self, ofc):
        """
        Do some magic to fix an bad OFC
        """
        ofc = self._remove_bad_tags(ofc)
        ofc = self._fill_dummy_tags(ofc)
        return self._inject_tags(ofc)

    def _remove_bad_tags(self, ofc):
        ofc_without_trnrs = re.sub(r'<[/]*TRNRS>', '', ofc)
        return re.sub(r'<[/]*CLTID>\w+', '', ofc_without_trnrs)

    def _fill_dummy_tags(self, ofc):
        expression = r'(<%s>)[^\w+]'
        replacement = r'<%s>0\n'
        ofc = re.sub(expression % 'FITID', replacement % 'FITID', ofc)
        filled_ofc = re.sub(expression % 'CHECKNUM', replacement % 'CHECKNUM',
                            ofc)

        return filled_ofc

    def _translate_chknum_to_checknum(self, ofc):
        """
        Some banks put an CHKNUM instead of CHECKNUM. this method translates
        CHKNUM to CHECKNUM in order to parse this information correctly
        """
        return re.sub('CHKNUM', 'CHECKNUM', ofc)

    def _inject_tags(self, ofc):
        tags = "<OFC>\n<ACCTSTMT>\n<ACCTFROM>\n<BANKID>0\n<ACCTID>0\n<ACCTTYPE>0\n</ACCTFROM>\n"
        if not re.findall(r'<OFC>\w*\s*<ACCTSTMT>', ofc):
            return ofc.replace('<OFC>', tags).replace('</OFC>',
                                                      '</ACCTSTMT>\n</OFC>')
Пример #5
0
class QifParser:
    def __init__(self, debug=False):
        account_items       = { 'N' : "Name",
                                'T' : "AccountType",
                                'D' : "Description",
                                'L' : "CreditLimit",
                                'X' : "UnknownField",
                                'B' : "Balance",
                                '/' : "BalanceDate",
                                '$' : "Balance" }
        
        noninvestment_items = { 'D' : "Date",
                                'T' : "Amount",
                                'U' : "Amount2",
                                'C' : "Cleared",
                                'N' : "Number",
                                'P' : "Payee",
                                'M' : "Memo",
                                'L' : "Category",
                                'A' : "Address",
                                'S' : "SplitCategory",
                                'E' : "SplitMemo",
                                '$' : "SplitAmount",
                                '-' : "NegativeSplitAmount" }
        
        investment_items    = { 'D' : "Date",
                                'N' : "Action",
                                'Y' : "Security",
                                'I' : "Price",
                                'Q' : "Quantity",
                                'T' : "Amount",
                                'C' : "Cleared",
                                'P' : "Text",
                                'M' : "Memo",
                                'O' : "Commission",
                                'L' : "TransferAccount",
                                '$' : "TransferAmount" }
        
        category_items      = { 'N' : "Name",
                                'D' : "Description",
                                'T' : "TaxRelated",
                                'I' : "IncomeCategory",
                                'E' : "ExpenseCategory",
                                'B' : "BudgetAmount",
                                'R' : "TaxSchedule" }
        
        class_items         = { 'N' : "Name",
                                'D' : "Description" }
        
        options   = Group(CaselessLiteral('!Option:') + restOfLine).suppress()
        
        banktxns  = Group(CaselessLiteral('!Type:Bank').suppress() + 
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("BankTransactions")
        
        cashtxns  = Group(CaselessLiteral('!Type:Cash').suppress() + 
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("CashTransactions")
        
        ccardtxns = Group(Or([CaselessLiteral('!Type:CCard').suppress(),
                              CaselessLiteral('!Type!CCard').suppress()]) + 
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("CreditCardTransactions")
        
        liabilitytxns = Group(CaselessLiteral('!Type:Oth L').suppress() + 
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("CreditCardTransactions")
        
        invsttxns = Group(CaselessLiteral('!Type:Invst').suppress() + 
                          ZeroOrMore(self._items(investment_items))
                          ).setResultsName("InvestmentTransactions")
        
        acctlist  = Group(CaselessLiteral('!Account').suppress() +
                          ZeroOrMore(Or([self._items(account_items, name="AccountInfo")]))
                          ).setResultsName("AccountList")
        
        category  = Group(CaselessLiteral('!Type:Cat').suppress() +
                          ZeroOrMore(self._items(category_items))
                          ).setResultsName("CategoryList")
        
        classlist = Group(CaselessLiteral('!Type:Class').suppress() +
                          ZeroOrMore(self._items(category_items))
                          ).setResultsName("ClassList")
        
        self.parser = Group(ZeroOrMore(White()).suppress() +
                            ZeroOrMore(acctlist).suppress() +
                            OneOrMore(ccardtxns | cashtxns | banktxns | liabilitytxns | invsttxns) +
                            ZeroOrMore(White()).suppress()
                            ).setResultsName("QifStatement")
        
        if (debug):
            self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction, 
                                        ofxtools._ofxtoolsSuccessDebugAction, 
                                        ofxtools._ofxtoolsExceptionDebugAction)
        
    
    def _items(self, items, name="Transaction"):
        item_list = []
        for (code, name) in items.iteritems():
            item = self._item(code, name)
            item_list.append(item)
        return Group(OneOrMore(Or(item_list)) +
                     oneOf('^EUR ^').setResultsName('Currency') +
                     LineEnd().suppress()
                     ).setResultsName(name)
    
    def _item(self, code, name):
        return CaselessLiteral(code).suppress() + \
               restOfLine.setResultsName(name) + \
               LineEnd().suppress()
    
    def parse(self, qif):
        return self.parser.parseString(qif)
Пример #6
0
class OfcParser:
    """Dirt-simple OFC parser for interpreting OFC documents."""
    def __init__(self, debug=False):
        aggregate = Forward().setResultsName("OFC")
        aggregate_open_tag, aggregate_close_tag = self._tag()
        content_open_tag = self._tag(closed=False)
        content = Group(content_open_tag + CharsNotIn("<\r\n"))
        aggregate << Group(aggregate_open_tag \
            + Dict(OneOrMore(aggregate | content)) \
            + aggregate_close_tag)

        self.parser = Group(aggregate).setResultsName("document")
        if (debug):
            self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction,
                                        ofxtools._ofxtoolsSuccessDebugAction,
                                        ofxtools._ofxtoolsExceptionDebugAction)

    def _tag(self, closed=True):
        """Generate parser definitions for OFX tags."""
        openTag = Literal("<").suppress() + Word(alphanums + ".") \
            + Literal(">").suppress()
        if (closed):
            closeTag = Group("</" + Word(alphanums + ".") + ">" + ZeroOrMore(White())).suppress()
            return openTag, closeTag
        else:
            return openTag

    def parse(self, ofc):
        """Parse a string argument and return a tree structure representing
        the parsed document."""
        ofc = self.remove_inline_closing_tags(ofc)
        try:
          return self.parser.parseString(ofc).asDict()
        except ParseException:
          fixed_ofc = self.fix_ofc(ofc)
          return self.parser.parseString(fixed_ofc).asDict()

    def remove_inline_closing_tags(self, ofc):
        """
        Fix an OFC, by removing inline closing 'tags'
        """
        return re.compile(r'(\w+.*)<\/\w+>', re.UNICODE).sub(r'\1', ofc)

    def fix_ofc(self, ofc):
        """
        Do some magic to fix an bad OFC
        """
        ofc = self._remove_bad_tags(ofc)
        ofc = self._fill_dummy_tags(ofc)
        return self._inject_tags(ofc)

    def _remove_bad_tags(self, ofc):
        ofc_without_trnrs = re.sub(r'<[/]*TRNRS>', '', ofc)
        return re.sub(r'<[/]*CLTID>\w+', '', ofc_without_trnrs)

    def _fill_dummy_tags(self, ofc):
        expression = r'(<%s>)[^\w+]'
        replacement = r'<%s>0\n'
        ofc = re.sub(expression % 'FITID', replacement % 'FITID' , ofc)
        filled_ofc = re.sub(expression % 'CHKNUM', replacement % 'CHKNUM' , ofc)

        return filled_ofc

    def _inject_tags(self, ofc):
        tags ="<OFC>\n<ACCTSTMT>\n<ACCTFROM>\n<BANKID>0\n<ACCTID>0\n<ACCTTYPE>0\n</ACCTFROM>\n"
        if not re.findall(r'<OFC>\w*\s*<ACCTSTMT>', ofc):
            return ofc.replace('<OFC>', tags).replace('</OFC>', '</ACCTSTMT>\n</OFC>')