Пример #1
0
    def __init__(self, app_info):
        self.logger = logging.getLogger(__name__)
        self.attrs = {}
        for key in app_info.keys():
            val = app_info[key]
            key = key.replace('(', '').replace(')', '').replace('/', '_')
            if val in ['', '#REF!']:
                val = None
            else:
                if key in self.INT_FIELDS + self.FLOAT_FIELDS and val.lower(
                ) == 'n/a':
                    val = '0'
                if key in self.DATE_FIELDS:
                    val = _convert_type(val, 'date')
                elif key in self.INT_FIELDS:
                    val = _convert_type(val, 'int')
                elif key in self.FLOAT_FIELDS:
                    val = _convert_type(val, 'float')
                elif key in self.BOOLEAN_FIELDS:
                    val = _convert_type(val, 'bool')
                else:
                    if sys.version_info < (3, 0):
                        val = val.decode('latin1').encode('utf-8')

            self.attrs[key] = val

        if self.attrs.get('x-coordinate') is not None and self.attrs.get(
                'y-coordinate') is not None:
            coord = Coord(self.attrs['x-coordinate'],
                          self.attrs['y-coordinate'])
            self.attrs['lat'], self.attrs['lon'] = coord.as_wgs84()
Пример #2
0
 def _parse_content(self, content):
     document = html5lib.parse(content,
                               treebuilder="lxml",
                               namespaceHTMLElements=False)
     for tbl in document.xpath('.//table'):
         ths = tbl.xpath('.//tr//th')
         if len(ths) == 0 or ths[0].text != 'Auction Date':
             continue
         for row in tbl.xpath('.//tr')[1:]:
             row_data = row.xpath('td')
             if len(row_data) == 0 or row_data[0].text.strip() == '':
                 continue
             str_good_date = sub('([0-9]*)[a-z]*( [A-z]* [0-9]*)',r'\1\2',row_data[0].text.strip())
             dtt = datetime.strptime(str_good_date, "%d %B %Y").date()
             auction_info = {'date': dtt,
                             'average_price': _convert_type(row_data[1].text.strip()[1:], 'float'),
                             'lowest_price': _convert_type(row_data[2].text.strip()[1:], 'float'),
                             'total_volume': _convert_type(row_data[3].text or '0', 'int'),
                             'co_fired_volume': _convert_type(row_data[4].text or '0', 'int'),
                             'period': "{}{:02d}".format(dtt.year, dtt.month)}
             self.auctions.append(auction_info)
     for info in self.auctions:
         if info['period'] in self.periods:
             previous = self.periods[info['period']]
             if not isinstance(previous, list):
                 self.periods[info['period']] = [info['average_price'], previous]
             else:
                 self.periods[info['period']].append(info['average_price'])
         else:
             self.periods[info['period']] = info['average_price']
     for key in self.periods.keys():
         if isinstance(self.periods[key], list):
             self.periods[key] = sum(self.periods[key]) / len(self.periods[key])
     return True
Пример #3
0
    def __init__(self, app_info):
        self.logger = logging.getLogger(__name__)
        self.attrs = {}
        for key in app_info.keys():
            val = app_info[key]
            key = key.replace('(', '').replace(')', '').replace('/', '_')
            if val in ['', '#REF!']:
                val = None
            else:
                if key in self.INT_FIELDS + self.FLOAT_FIELDS and val.lower() == 'n/a':
                    val = '0'
                if key in self.DATE_FIELDS:
                    val = _convert_type(val, 'date')
                elif key in self.INT_FIELDS:
                    val = _convert_type(val, 'int')
                elif key in self.FLOAT_FIELDS:
                    val = _convert_type(val, 'float')
                elif key in self.BOOLEAN_FIELDS:
                    val = _convert_type(val, 'bool')
                else:
                    if sys.version_info < (3, 0):
                        val = val.decode('latin1').encode('utf-8')

            self.attrs[key] = val

        if self.attrs.get('x-coordinate') is not None and self.attrs.get('y-coordinate') is not None:
            coord = Coord(self.attrs['x-coordinate'], self.attrs['y-coordinate'])
            self.attrs['lat'], self.attrs['lon'] = coord.as_wgs84()
Пример #4
0
 def __init__(self, el):
     """ Object to represent a single fuel record entry. These
         will have the format:
           <FUEL TYPE="OTHER" IC="N" VAL="13528" PCT="1.6"/>
     """
     self.type = el.get("TYPE")
     self.icr = el.get("IC")
     self.val = _convert_type(el.get("VAL"), 'int')
     self.pct = _convert_type(el.get("PCT"), 'float')
Пример #5
0
 def __init__(self, el):
     """ Object to represent a single fuel record entry. These
         will have the format:
           <FUEL TYPE="OTHER" IC="N" VAL="13528" PCT="1.6"/>
     """
     self.type = el.get("TYPE")
     self.icr = el.get("IC")
     self.val = _convert_type(el.get("VAL"), 'int')
     self.pct = _convert_type(el.get("PCT"), 'float')
Пример #6
0
 def _extract_row_data(self, wbb, sht, rownum):
     row_data = {
         'sett_id': sht.cell(rownum, 0).value,
         'ngc_id': sht.cell(rownum, 1).value,
         'name': sht.cell(rownum, 2).value,
         'reg_capacity': sht.cell(rownum, 3).value,
         'date_added': _mkdate(wbb, sht, rownum, 4),
         'bmunit': _convert_type(sht.cell(rownum, 5).value, 'bool'),
         'cap': _convert_type(sht.cell(rownum, 6).value, 'float')
     }
     if row_data['ngc_id'] == '':
         return
     self.units.append(row_data)
Пример #7
0
 def _extract_row_data(self, wbb, sht, rownum):
     row_data = {
         'sett_id': sht.cell(rownum, 0).value,
         'ngc_id': sht.cell(rownum, 1).value,
         'name': sht.cell(rownum, 2).value,
         'reg_capacity': sht.cell(rownum, 3).value,
         'date_added': _mkdate(wbb, sht, rownum, 4),
         'bmunit': _convert_type(sht.cell(rownum, 5).value, 'bool'),
         'cap': _convert_type(sht.cell(rownum, 6).value, 'float')
     }
     if row_data['ngc_id'] == '':
         return
     self.units.append(row_data)
Пример #8
0
 def _parse_content(self, content):
     document = html5lib.parse(content,
                               treebuilder="lxml",
                               namespaceHTMLElements=False)
     for tbl in document.xpath('.//table'):
         ths = tbl.xpath('.//tr//th')
         if len(ths) == 0 or ths[0].text != 'Auction Date':
             continue
         for row in tbl.xpath('.//tr')[1:]:
             row_data = row.xpath('td')
             if len(row_data) == 0 or row_data[0].text.strip() == '':
                 continue
             str_good_date = sub('([0-9]*)[a-z]*( [A-z]* [0-9]*)', r'\1\2',
                                 row_data[0].text.strip())
             dtt = parse_date_string(str_good_date)
             auction_info = {
                 'date':
                 dtt,
                 'average_price':
                 _convert_type(row_data[1].text.strip()[1:], 'float'),
                 'lowest_price':
                 _convert_type(row_data[2].text.strip()[1:], 'float'),
                 'total_volume':
                 _convert_type(row_data[3].text or '0', 'int'),
                 'co_fired_volume':
                 _convert_type(row_data[4].text or '0', 'int'),
                 'period':
                 "{}{:02d}".format(dtt.year, dtt.month)
             }
             self.auctions.append(auction_info)
     for info in self.auctions:
         if info['period'] in self.periods:
             previous = self.periods[info['period']]
             if not isinstance(previous, list):
                 self.periods[info['period']] = [
                     info['average_price'], previous
                 ]
             else:
                 self.periods[info['period']].append(info['average_price'])
         else:
             self.periods[info['period']] = info['average_price']
     for key in self.periods.keys():
         if isinstance(self.periods[key], list):
             self.periods[key] = sum(self.periods[key]) / len(
                 self.periods[key])
     return True
Пример #9
0
def main():
    """ Function that actually does the work :-) """
    parser = commandline_parser('Download bulk information from Ofgem to produce an Excel spreadsheet')
    parser.add_argument('start', type=int, help='Period to start (YYYYMM)')
    parser.add_argument('end',  type=int, help='Period to finish (YYYYMM)')
    parser.add_argument('--filename',
                        default='certificates.xls',
                        help='Filename to export to')
    parser.add_argument('--stations', nargs='*', help='Stations to search for')
    args = parser.parse_args()
    print(args)

    if not args.filename.endswith('.xls'):
        args.filename += '.xls'

    periods = []
    start_dt = _convert_type(args.start, 'period')
    end_dt = _convert_type(args.end, 'period')
    for yyy in range(start_dt.year, end_dt.year + 1):
        mmm = start_dt.month if start_dt.year == yyy else 1
        mm2 = end_dt.month if end_dt.year == yyy else 12
        for mon in range(mmm, mm2+1):
            periods.append(date(yyy, mon, 1))

    print("Period covered will be {} to {}. A total of {} periods".
          format(start_dt.strftime("%b-%Y"),
                 end_dt.strftime("%b-%Y"),
                 len(periods)))

    stations = []
    station_names = args.stations or []

    if args.input is not None:
        with open(args.input) as fh:
            for line in fh.readlines():
                station = line.strip()
                if '#' in station:
                    (station, dummy_junk) = station.split('#', 1)
                station_names.append(station)

    if len(station_names) > 0:
        print("Station names to be searched for:")
        for stat in station_names:
            print("    - {}".format(stat))

    while True:
        station = raw_input("Enter a station name (or blank to finish)")
        if station.strip() == '':
            break
        if ',' in station:
            for s in station.strip().split(','):
                station_names.append(s)
        else:
            station_names.append(station)

    if len(station_names) == 0:
        print("No stations to process. Exiting...")
        sys.exit(0)

    print("\nSearching for stations...")
    for name in station_names:
        print("    - {}".format(name))
        sss = StationSearch()
        sss.start()
        if sss.filter_name(name) and sss.get_data():
            stations.extend(sss.stations)
            print("        found")
        else:
            print("        no stations found")

    print("A total of {} stations will be recorded".format(len(stations)))

    wbb = Workbook()
    add_station_sheet(wbb, stations)

    print("\nGetting certificate data (this is quicker)...")
    certificates = {}
    for station in stations:
        print("    - {}".format(station.name))
        ocs = CertificateSearch()
        ocs.start()
        if ocs.filter_generator_id(station.generator_id) and \
                ocs.set_start_month(start_dt.month) and \
                ocs.set_start_year(start_dt.year) and \
                ocs.set_finish_month(end_dt.month) and \
                ocs.set_finish_year(end_dt.year) and \
                ocs.get_data():
            certificates[station.name] = ocs.cert_list
            add_certificate_sheet(wbb, station, ocs.certificates)
            print("        added to spreadsheet")
        else:
            print("        nothing to add")

    wbb.save(args.filename)
    print("\nData saved to {}".format(args.filename))
Пример #10
0
def main():
    """ Function that actually does the work :-) """
    parser = commandline_parser(
        'Download bulk information from Ofgem to produce an Excel spreadsheet')
    parser.add_argument('start', type=int, help='Period to start (YYYYMM)')
    parser.add_argument('end', type=int, help='Period to finish (YYYYMM)')
    parser.add_argument('--filename',
                        default='certificates.xls',
                        help='Filename to export to')
    parser.add_argument('--stations', nargs='*', help='Stations to search for')
    args = parser.parse_args()
    print(args)

    if not args.filename.endswith('.xls'):
        args.filename += '.xls'

    periods = []
    start_dt = _convert_type(args.start, 'period')
    end_dt = _convert_type(args.end, 'period')
    for yyy in range(start_dt.year, end_dt.year + 1):
        mmm = start_dt.month if start_dt.year == yyy else 1
        mm2 = end_dt.month if end_dt.year == yyy else 12
        for mon in range(mmm, mm2 + 1):
            periods.append(date(yyy, mon, 1))

    print("Period covered will be {} to {}. A total of {} periods".format(
        start_dt.strftime("%b-%Y"), end_dt.strftime("%b-%Y"), len(periods)))

    stations = []
    station_names = args.stations or []

    if args.input is not None:
        with open(args.input) as fh:
            for line in fh.readlines():
                station = line.strip()
                if '#' in station:
                    (station, dummy_junk) = station.split('#', 1)
                station_names.append(station)

    if len(station_names) > 0:
        print("Station names to be searched for:")
        for stat in station_names:
            print("    - {}".format(stat))

    while True:
        station = raw_input("Enter a station name (or blank to finish)")
        if station.strip() == '':
            break
        if ',' in station:
            for s in station.strip().split(','):
                station_names.append(s)
        else:
            station_names.append(station)

    if len(station_names) == 0:
        print("No stations to process. Exiting...")
        sys.exit(0)

    print("\nSearching for stations...")
    for name in station_names:
        print("    - {}".format(name))
        sss = StationSearch()
        sss.start()
        if sss.filter_name(name) and sss.get_data():
            stations.extend(sss.stations)
            print("        found")
        else:
            print("        no stations found")

    print("A total of {} stations will be recorded".format(len(stations)))

    wbb = Workbook()
    add_station_sheet(wbb, stations)

    print("\nGetting certificate data (this is quicker)...")
    certificates = {}
    for station in stations:
        print("    - {}".format(station.name))
        ocs = CertificateSearch()
        ocs.start()
        if ocs.filter_generator_id(station.generator_id) and \
                ocs.set_start_month(start_dt.month) and \
                ocs.set_start_year(start_dt.year) and \
                ocs.set_finish_month(end_dt.month) and \
                ocs.set_finish_year(end_dt.year) and \
                ocs.get_data():
            certificates[station.name] = ocs.cert_list
            add_certificate_sheet(wbb, station, ocs.certificates)
            print("        added to spreadsheet")
        else:
            print("        nothing to add")

    wbb.save(args.filename)
    print("\nData saved to {}".format(args.filename))