Exemplo n.º 1
0
    def on_loaded(self):
        self.operations = []

        for tr in self.document.getiterator('tr'):
            if tr.attrib.get('class', '') == 'hdoc1' or tr.attrib.get('class', '') == 'hdotc1':
                tds = tr.findall('td')
                if len(tds) != 4:
                    continue
                date = u''
                date = tds[0].text
                label = u''
                label += tds[1].text
                label = label.replace(u'\xa0', u'')
                for child in tds[1].getchildren():
                    if child.text: label += child.text
                    if child.tail: label += child.tail
                if tds[1].tail: label += tds[1].tail
                label = label.strip()
                amount = tds[2].text.replace('.', '').replace(',', '.')
                # if we don't have exactly one '.', this is not a floatm try the next
                operation = Operation(len(self.operations))
                if amount.count('.') != 1:
                    amount = tds[3].text.replace('.', '').replace(',', '.')
                    operation.amount = float(amount)
                else:
                    operation.amount = - float(amount)

                operation.date = date
                operation.label = label
                self.operations.append(operation)
Exemplo n.º 2
0
    def get_history(self):
        mvt_table = self.document.xpath("//table[@id='mouvements']", smart_strings=False)[0]
        mvt_ligne = mvt_table.xpath("./tbody/tr")

        operations = []

        for mvt in mvt_ligne:
            operation = Operation(len(operations))
            operation.date = mvt.xpath("./td")[0].text
            tp = mvt.xpath("./td")[1]
            operation.label = remove_extra_spaces(remove_html_tags(self.browser.parser.tostring(tp)))

            r = re.compile(r"\d+")
            tp = mvt.xpath("./td/span")
            amount = None
            for t in tp:
                if r.search(t.text):
                    amount = t.text
            amount = "".join(amount.replace(".", "").replace(",", ".").split())
            if amount[0] == "-":
                operation.amount = -float(amount[1:])
            else:
                operation.amount = float(amount)

            operations.append(operation)
        return operations
Exemplo n.º 3
0
 def get_history(self):
     index = 0
     for tr in self.document.getiterator('tr'):
         first_td = tr.getchildren()[0]
         if first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g':
             operation = Operation(index)
             index += 1
             operation.date = first_td.text
             operation.label = tr.getchildren()[2].text.replace('\n',' ')
             if len(tr.getchildren()[3].text) > 2:
                 s = tr.getchildren()[3].text
             elif len(tr.getchildren()[4].text) > 2:
                 s = tr.getchildren()[4].text
             else:
                 s = "0"
             balance = u''
             for c in s:
                 if c.isdigit() or c == "-":
                     balance += c
                 if c == ',':
                     balance += '.'
             operation.amount = float(balance)
             yield operation
Exemplo n.º 4
0
    def on_loaded(self):
        self.operations = []

        for tr in self.document.getiterator('tr'):
            if tr.attrib.get('class', '') == 'hdoc1' or tr.attrib.get('class', '') == 'hdotc1':
                tds = tr.findall('td')
                if len(tds) != 3:
                    continue
                date = tds[0].getchildren()[0].attrib.get('name', '')
                label = u''
                label += tds[1].text or u''
                label = label.replace(u'\xa0', u'')
                for child in tds[1].getchildren():
                    if child.text: label += child.text
                    if child.tail: label += child.tail
                if tds[1].tail: label += tds[1].tail
                label = label.strip()
                amount = tds[2].text.replace('.', '').replace(',', '.')

                operation = Operation(len(self.operations))
                operation.date = date
                operation.label = label
                operation.amount = float(amount)
                self.operations.append(operation)
Exemplo n.º 5
0
    def get_history(self, start_index = 0):
        """
            Returns the history of a specific account. Note that this function
            expects the current page page to be the one dedicated to this history.
        """
        # tested on CA Lorraine, Paris, Toulouse
        # avoir parsing the page as an account-dedicated page if it is not the case
        if not self.is_account_page():
            return

        index = start_index
        operation = False

        body_elmt_list = self.document.xpath('/html/body/*')

        # type of separator used in the page
        separators = 'hr'
        # How many <hr> elements do we have under the <body>?
        sep_expected = len(self.document.xpath('/html/body/hr'))
        if (not sep_expected):
            # no <hr>? Then how many class-less <div> used as separators instead?
            sep_expected = len(self.document.xpath('/html/body/div[not(@class) and not(@style)]'))
            separators = 'div'

        # the interesting divs are after the <hr> elements
        interesting_divs = []
        right_div_count = 0
        left_div_count = 0
        sep_found = 0
        for body_elmt in body_elmt_list:
            if (separators == 'hr' and body_elmt.tag == 'hr'):
                sep_found += 1
            elif (separators == 'div' and body_elmt.tag == 'div' and body_elmt.get('class', 'nope') == 'nope'):
                sep_found += 1
            elif (sep_found >= sep_expected and body_elmt.tag == 'div'):
                # we just want <div> with dv class and a style attribute
                if (body_elmt.get('class', '') != 'dv'):
                    continue
                if (body_elmt.get('style', 'nope') == 'nope'):
                    continue
                interesting_divs.append(body_elmt)
                if (self.is_right_aligned_div(body_elmt)):
                    right_div_count += 1
                else:
                    left_div_count += 1

        # So, how are data laid out?
        toulouse_way_of_life = (left_div_count == 2 * right_div_count)
        # we'll have: one left-aligned div for the date, one right-aligned
        # div for the amount, and one left-aligned div for the label. Each time.

        if (not toulouse_way_of_life):
            for body_elmt in interesting_divs:
                if (self.is_right_aligned_div(body_elmt)):
                    # this is the second line of an operation entry, displaying the amount
                    data = self.extract_text(body_elmt).replace(',', '.').replace(' ', '')
                    matches = re.findall('^(-?[0-9]+\.[0-9]{2}).*$', data)
                    operation.amount = float(matches[0]) if (matches) else 0.0
                    yield operation
                else:
                    # this is the first line of an operation entry, displaying the date and label
                    data = self.extract_text(body_elmt)
                    matches = re.findall('^([012][0-9]|3[01])/(0[1-9]|1[012]).(.+)$', data)
                    operation = Operation(index)
                    index += 1
                    if (matches):
                        operation.date  = u'%s/%s' % (matches[0][0], matches[0][1])
                        operation.label = u'%s'    % matches[0][2]
                    else:
                        operation.date  = u'01/01'
                        operation.label = u'Unknown'
        else:
            for i in range(0, len(interesting_divs)/3):
                operation = Operation(index)
                index += 1
                # amount
                data = self.extract_text(interesting_divs[(i*3)+1]).replace(',', '.').replace(' ', '')
                matches = re.findall('^(-?[0-9]+\.[0-9]{2}).*$', data)
                operation.amount = float(matches[0]) if (matches) else 0.0
                # date
                data = self.extract_text(interesting_divs[i*3])
                matches = re.findall('^([012][0-9]|3[01])/(0[1-9]|1[012])', data)
                operation.date = u'%s/%s' % (matches[0][0], matches[0][1]) if (matches) else u'01/01'
                #label
                data = self.extract_text(interesting_divs[(i*3)+2])
                data = re.sub(' +', ' ', data)
                operation.label = u'%s' % data
                yield operation