Python format_fields примеры, utility_bill_scraper.format_fields Python примеры использования

Пример #1

0

Показать файл

Файл: kitchener_utilities.py Проект: ryanfobel/utility-bill-scraper

def get_water_and_sewer_charges(soup):
    def find_water_consumption(tag):
        return (tag.name == u'div') and \
            (tag.decode().find('Consumption') >= 0) and \
            (tag.decode().find('Total Consumption') == -1)

    water_div = soup.find_all(find_water_consumption)[0]
    water_type = format_fields(water_div.next_sibling.contents[0])

    result = {u'Time period': water_type[0]}
    water_type = water_type[1:]

    consumption = format_fields(
        water_div.next_sibling.next_sibling.contents[0])
    rates = format_fields(water_div.next_sibling.next_sibling.next_sibling.
                          next_sibling.contents[0])
    charges = format_fields(water_div.next_sibling.next_sibling.next_sibling.
                            next_sibling.next_sibling.contents[0])

    for x in range(len(water_type)):
        result[water_type[x]] = {
            u'Consumption': consumption[x],
            u'Rate': rates[x],
            u'Charges': charges[x]
        }
    return result

Пример #2

0

Показать файл

Файл: kitchener_utilities.py Проект: ryanfobel/utility-bill-scraper

def get_water_consumption(soup):
    def find_total_consumption(tag):
        return tag.name == u'div' and tag.decode().find(
            'Total Consumption') >= 0

    div_list = soup.find_all(find_total_consumption)

    # Find the div containing 3 fields (gas has an extra
    # 'Billing Conversion Multiplier'). Note that it is possible to have
    # more than one consumption section.

    tags = [x for x in div_list if len(format_fields(x.contents[0])) == 3]

    consumption = []

    for tag in tags:
        # Extract the top pixel coordinate.
        match = re.search('top:(?P<top>\d+)px', tag.decode())
        top = match.groups()[0]

        # Match all divs with the same top pixel coordinate.
        def find_matching_top(tag):
            return tag.name == u'div' and tag.decode().find(
                'top:%spx' % top) >= 0

        divs = [
            format_fields(x.contents[0])
            for x in soup.find_all(find_matching_top)
        ]
        consumption.append(dict(zip(divs[0], divs[2])))
    return consumption

Пример #3

0

Показать файл

Файл: kitchener_utilities.py Проект: ryanfobel/utility-bill-scraper

def get_summary(soup):
    def find_seq_id(tag):
        return tag.name == u'div' and tag.decode().find('SEQ-ID') >= 0

    def find_account_summary(tag):
        return tag.name == u'span' and tag.decode(). \
            find('Your Account Summary') >= 0

    summary_fields = format_fields(
        soup.find_all(find_account_summary)[0].contents)
    summary_data = format_fields(
        soup.find_all(find_seq_id)[0].next_sibling.contents[0].contents)

    summary_dict = dict(zip(summary_fields[1:], summary_data))

    def find_charges(name):
        def find_matching_div(tag):
            return tag.name == u'div' and tag.decode().find(name) >= 0

        tag = soup.find(find_matching_div)

        # Extract the top pixel coordinate.
        match = re.search('top:(?P<top>\d+)px', tag.decode())
        top = match.groups()[0]

        # Find the second div with the same top pixel coordinate.
        return format_fields(
            soup.find_all(style=re.compile('top:%spx' %
                                           top))[1].span.contents)[0]

    summary_dict[u'Water Charges'] = find_charges('Water charges')
    summary_dict[u'Gas Charges'] = find_charges('Gas charges')

    return summary_dict

Пример #4

0

Показать файл

Файл: kitchener_wilmot_hydro.py Проект: ryanfobel/utility-bill-scraper

def get_amount_due(soup):
    def find_new_charges(tag):
        return tag.name == u'div' and tag.decode().find('New Charges') >= 0

    tag = soup.find(find_new_charges)
    match = re.search('top:(?P<top>\d+)px', tag.decode())
    top = float(match.groups()[0])

    def find_left_pos(tag):
        if tag.name == u'div':
            match = re.search('left:(?P<left>\d+)px', tag.decode())
            if match:
                left = float(match.groups()[0])
                return left >= 120 and left <= 132
        return False

    tags = soup.find_all(find_left_pos)

    distance = []
    for tag in tags:
        match = re.search('top:(?P<top>\d+)px', tag.decode())
        distance.append(abs(float(match.groups()[0]) - top))

    amount_due = format_fields(
        tags[np.nonzero(distance == np.min(distance))[0][0]].span)[0]

    index = str(amount_due).find('CR')
    if index >= 0:
        amount_due = (-1) * float(amount_due[:index])

    return amount_due

Пример #5

0

Показать файл

def get_summary(soup):
    def find_gas_used_this_period(tag):
        return (tag.name == u'div'
                and tag.decode().find('Gas used this period') >= 0)

    div = soup.find(find_gas_used_this_period)

    field_data = format_fields(div.next_sibling.span.contents)
    """
    field_names = [format_fields(x.contents) for x in div.contents]

    # Flatten the list of lists.
    field_names = [item for sublist in field_names for item in sublist]
    """

    # Dynamic discovery of field names failing. Hard-code for now.
    field_names = [
        u'Meter Number', u'Estimated Reading', u'Previous Reading',
        u'Gas used this period', u'PEF Value', u'Adjusted volume'
    ]

    summary_dict = dict(zip(field_names, field_data))
    summary_dict[u'Bill Date'] = get_bill_date(soup)
    summary_dict[u'Amount Due'] = get_amount_due(soup)

    return summary_dict

Пример #6

0

Показать файл

def get_amount_due(soup):
    pos_re = ('left:(?P<left>\d+)px.*top:(?P<top>\d+)px.*'
              'width:(?P<width>\d+)px.*height:(?P<height>\d+)')

    def find_amount_due_now(tag):
        return tag.name == u'div' and tag.decode().find('Amount due now') >= 0

    tag = soup.find_all(find_amount_due_now)[-1]
    pos = re.search(pos_re, tag.decode()).groupdict()
    pos = {k: int(v) for (k, v) in pos.items()}
    pos['bottom'] = pos['top'] + pos['height']
    pos['right'] = pos['left'] + pos['width']

    def find_divs_on_same_line(tag):
        if tag.name == u'div':
            match = re.search(pos_re, tag.decode())
            if match:
                top = int(match.groupdict()['top'])
                bottom = top + int(match.groupdict()['height'])
                left = int(match.groupdict()['left'])
                right = left + int(match.groupdict()['width'])
                return ((left > pos['right']) and
                        ((top >= pos['top'] and top <= pos['bottom']) or
                         (bottom >= pos['top'] and bottom <= pos['bottom'])))
        return False

    return format_fields(
        soup.find(find_divs_on_same_line).span.contents)[0][1:]

Пример #7

0

Показать файл

Файл: kitchener_utilities.py Проект: ryanfobel/utility-bill-scraper

def get_gas_rates(soup):
    def find_gas_rates(tag):
        return tag.name == u'div' and tag.decode().find(
            'Gas Fixed Delivery Charge') >= 0

    gas_div = soup.find_all(find_gas_rates)[0]
    gas_fields = format_fields(gas_div.contents[0])
    gas_fields = gas_fields[1:]
    gas_rates = format_fields(
        gas_div.next_sibling.next_sibling.next_sibling.contents[0])
    gas_charges = format_fields(gas_div.next_sibling.next_sibling.next_sibling.
                                next_sibling.contents[0])

    return dict(
        zip([
            x + ' Rate' for x in gas_fields
            if (x.find('HST') == -1) and x.find('Fixed') == -1
        ], gas_rates))

Пример #8

0

Показать файл

Файл: kitchener_wilmot_hydro.py Проект: ryanfobel/utility-bill-scraper

def get_billing_date(soup):
    def find_billing(tag):
        return tag.name == u'div' and tag.decode().find('BILLING DATE') >= 0

    match = re.search(
        '([A-Z]+)\s+(\d+)\s+(\d+)',
        format_fields(
            soup.find_all(find_billing)
            [0].next_sibling.next_sibling.span.contents)[0])
    month, day, year = match.groups()

    return arrow.get("%s %s %s" % (month, day, year),
                     'MMM DD YYYY').date().isoformat()

Пример #9

0

Показать файл

Файл: kitchener_utilities.py Проект: ryanfobel/utility-bill-scraper

    def find_charges(name):
        def find_matching_div(tag):
            return tag.name == u'div' and tag.decode().find(name) >= 0

        tag = soup.find(find_matching_div)

        # Extract the top pixel coordinate.
        match = re.search('top:(?P<top>\d+)px', tag.decode())
        top = match.groups()[0]

        # Find the second div with the same top pixel coordinate.
        return format_fields(
            soup.find_all(style=re.compile('top:%spx' %
                                           top))[1].span.contents)[0]

Пример #10

0

Показать файл

Файл: kitchener_wilmot_hydro.py Проект: ryanfobel/utility-bill-scraper

def get_electricity_rates(soup):
    def find_kWhOffPeak(tag):
        return tag.name == u'div' and tag.decode().find('kWh Off Peak') >= 0

    tag = soup.find(find_kWhOffPeak)
    match = re.search('top:(?P<top>\d+)px', tag.decode())
    top = match.groups()[0]

    # match all divs with the same top pixel coordinate
    def find_matching_top(tag):
        return tag.name == u'div' and tag.decode().find('top:%spx' % top) >= 0

    for x in soup.find_all(find_matching_top):
        fields = format_fields(x.span)
        if len(fields) > 0 and str(fields[0]).find('at $') == 0:
            rates = [float(x[4:]) for x in fields]
            break

    return dict(zip(['off peak', 'on peak', 'mid peak'], rates))

Пример #11

0

Показать файл

Файл: kitchener_wilmot_hydro.py Проект: ryanfobel/utility-bill-scraper

def get_electricity_consumption(soup):
    def find_kWh(tag):
        return tag.name == u'span' and (
            tag.decode().find('kWh Off Peak') >= 0
            or tag.decode().find('kWh Mid Peak') >= 0
            or tag.decode().find('kWh On Peak') >= 0)

    fields = []
    for x in soup.find_all(find_kWh):
        fields += format_fields(x)

    data = {'off peak': 0, 'mid peak': 0, 'on peak': 0}

    for x in fields:
        if x.find('kWh Off Peak') > 0:
            data['off peak'] += float(x[:x.find('kWh Off Peak')])
        elif x.find('kWh Mid Peak') > 0:
            data['mid peak'] += float(x[:x.find('kWh Mid Peak')])
        elif x.find('kWh On Peak') > 0:
            data['on peak'] += float(x[:x.find('kWh On Peak')])

    return data

Пример #12

0

Показать файл

def get_bill_date(soup):
    def find_bill_date(tag):
        return tag.name == u'div' and tag.decode().find('Bill Date') >= 0

    return format_fields(soup.find(find_bill_date).contents[1].contents)[0]

Python format_fields примеры использования