Exemplo n.º 1
0
def run():

    timetable = 'https://www.somerset.gov.uk/libraries-leisure-and-communities/libraries/library-facilities/mobile-library/'
    mobiles = []

    with open(DATA_SOURCE, 'r') as som_raw:
        mobreader = csv.reader(som_raw, delimiter=',', quotechar='"')
        next(mobreader, None)  # skip the headers
        for row in mobreader:

            mobile_library = 'Mobile'
            frequency = 'FREQ=WEEKLY;INTERVAL=4'
            route = row[0].strip()
            community = row[1].strip()
            stop_name = row[2].strip()
            address = stop_name + ', ' + community
            postcode = row[3].strip()
            arrival = row[4].strip().replace('.', ':')
            departure = row[5].strip().replace('.', ':')
            day = row[6].strip()
            start = row[7].strip()

            postcode_request = requests.get(url)
            postcode_data = json.loads(postcode_request.text)
            latitude = postcode_data['result']['latitude']
            longitude = postcode_data['result']['longitude']

            mobiles.append([
                mobile_library, route, community, stop_name, address, postcode,
                longitude, latitude, day, 'Public', arrival, departure,
                frequency, start, '', '', timetable
            ])

    create_mobile_library_file('Somerset', 'somerset.csv', mobiles)
Exemplo n.º 2
0
def run():

    timetable = 'https://www.portsmouth.gov.uk/ext/libraries/mobile-library'
    mobiles = []

    with open(DATA_SOURCE, 'r') as port_raw:
        mobreader = csv.reader(port_raw, delimiter=',', quotechar='"')
        next(mobreader, None)  # skip the headers
        for row in mobreader:

            mobile_library = 'Mobile'
            frequency = 'FREQ=WEEKLY;INTERVAL=1'
            route = row[0].strip()
            community = row[4].strip()
            stop_name = row[3].strip()
            address = stop_name + ', ' + community
            postcode = ''
            arrival = row[1].strip()
            departure = row[2].strip()
            day = row[0].strip()
            start = row[7].strip()
            latitude = row[5].strip()
            longitude = row[6].strip()

            mobiles.append([
                mobile_library, route, community, stop_name, address, postcode,
                longitude, latitude, day, 'Public', arrival, departure,
                frequency, start, '', '', timetable
            ])

    create_mobile_library_file('Portsmouth', 'portsmouth.csv', mobiles)
Exemplo n.º 3
0
def run():

    timetable = 'https://www.milton-keynes.gov.uk/libraries/about-libraries/mobile-library-service'
    mobiles = []

    with open(DATA_SOURCE, 'r') as mk_raw:
        mobreader = csv.reader(mk_raw, delimiter=',', quotechar='"')
        next(mobreader, None)  # skip the headers
        for row in mobreader:

            day = row[0]
            arrival = row[1]
            departure = row[2]
            community = row[3]
            stop_name = row[4]
            postcode = row[5]
            start = row[6]
            address = stop_name + ', ' + community
            frequency = 'FREQ=WEEKLY;INTERVAL=1'

            url = 'https://api.postcodes.io/postcodes/' + postcode
            postcode_request = requests.get(url)
            postcode_data = json.loads(postcode_request.text)
            latitude = postcode_data['result']['latitude']
            longitude = postcode_data['result']['longitude']

            mobiles.append([
                'Mobile', day, community, stop_name, address, postcode,
                longitude, latitude, day, 'Public', arrival, departure,
                frequency, start, '', '', timetable
            ])

    create_mobile_library_file('Milton Keynes', 'milton_keynes.csv', mobiles)
def run():

    timetable = 'https://www.northyorks.gov.uk/supermobile-library'
    mobiles = []

    with open(DATA_SOURCE, 'r') as raw:
        reader = csv.reader(raw, delimiter=',', quotechar='"')
        next(reader, None)  # skip the headers
        for row in reader:

            day = row[0]
            arrival = row[3]
            departure = row[4]
            community = row[1]
            stop_name = row[2]
            postcode = ''
            start = row[5]
            address = stop_name + ', ' + community
            frequency = 'FREQ=WEEKLY;INTERVAL=2'
            latitude = row[6]
            longitude = row[7]

            mobiles.append([
                'Supermobile', day, community, stop_name, address, postcode,
                longitude, latitude, day, 'Public', arrival, departure,
                frequency, start, '', '', timetable
            ])

    create_mobile_library_file('North Yorkshire', 'north_yorkshire.csv',
                               mobiles)
def run():

    timetable = 'https://www.birmingham.gov.uk/info/50163/library_services/1479/mobile_library_service/3'
    mobiles = []

    with open(DATA_SOURCE, 'r') as raw:
        reader = csv.reader(raw, delimiter=',', quotechar='"')
        for row in reader:

            day = row[0]
            arrival = row[1]
            departure = row[2]
            community = row[3]
            stop_name = row[4]
            latitude = row[5]
            longitude = row[6]
            frequency = row[7]
            start = row[8]
            route = row[9]
            address = stop_name + ', ' + community

            mobiles.append([
                'Mobile', route, community, stop_name, address, '', longitude,
                latitude, day, 'Public', arrival, departure, frequency, start,
                '', '', timetable
            ])

    create_mobile_library_file('Birmingham', 'birmingham.csv', mobiles)
Exemplo n.º 6
0
def run():

    features = read_data(DATA_SOURCE_1) + read_data(DATA_SOURCE_2)
    features_sorted = sorted(features,
                             key=lambda k: k['attributes']['STOP_NUMBE'])
    features_deduped = [
        i for n, i in enumerate(features_sorted)
        if i not in features_sorted[n + 1:]
    ]

    mobiles = []
    organisation = 'Norfolk'
    frequency = 'FREQ=WEEKLY;INTERVAL=4'

    for feature in features_deduped:

        route = feature['attributes']['ROUTE_NUMB']
        stop_name = feature['attributes']['STOP_NAME'].title()
        details = feature['attributes']['DETAILS']

        longitude = feature['geometry']['x']
        latitude = feature['geometry']['y']

        # Make the mobile library the first 3 letters of the route
        mobile_library = route[0:3]
        route_number = route[3:6]

        if mobile_library == 'CEN':
            mobile_library = 'CEN' + CEN_Routes[route]

        # Extract stuff from the details
        # e.g. Mobile Library Route Number EDE305 will next visit NORTH ELMHAM, ORCHARD CLOSE No.20 on 18/09/2019 Arrival 16:10 Departure 16:25

        # First replace all HH:MM:SS

        details_matches = re.compile(DETAILS_RE)
        details_groups = details_matches.search(details)

        community = details_groups.group(2).title()
        date = datetime.strptime(details_groups.group(4),
                                 '%d/%m/%Y') - four_weeks
        arrival = details_groups.group(5)[0:5]
        departure = details_groups.group(6)[0:5]

        address = stop_name + ', ' + community
        start = date.strftime('%Y-%m-%d')
        day = date.strftime('%A')

        timetable = TIMETABLE + mobile_library + '-' + route_number + '.pdf'

        mobiles.append([
            mobile_library, route, community, stop_name, address, '',
            longitude, latitude, day, 'Public', arrival, departure, frequency,
            start, '', '', timetable
        ])

    create_mobile_library_file(organisation, 'norfolk.csv', mobiles)
Exemplo n.º 7
0
def run():

    features = read_data(DATA_SOURCE)

    mobiles = []
    organisation = 'Bath and North East Somerset'
    mobile_name = 'Mobile'
    frequency = 'FREQ=WEEKLY;INTERVAL=2'
    dates = {
        "Route 1": "2019-10-14",
        "Route 2": "2019-10-15",
        "Route 3": "2019-10-16",
        "Route 4": "2019-10-17",
        "Route 5": "2019-10-18",
        "Route 6": "2019-10-21",
        "Route 7": "2019-10-22",
        "Route 8": "2019-10-23",
        "Route 9": "2019-10-24"
    }

    for feature in features:

        route = feature['properties']['fields']['day_number'].replace('Day', 'Route')
        stop_name = feature['properties']['fields']['stop']
        community = feature['properties']['fields']['village']
        if stop_name == '':
            stop_name = community
        address = stop_name + ', ' + community
        day = feature['properties']['fields']['day']
        time = feature['properties']['fields']['time']
        arrival = time.split('-')[0].strip().replace('.', ':')
        departure = time.split('-')[-1].strip().replace('.', ':')
        start = ''
        if community != 'LIBRARY DEPOT':
            start = dates[route]

        easting = feature['geometry']['coordinates'][0][0]
        northing = feature['geometry']['coordinates'][0][1]

        latitude = ''
        longitude = ''
        point = geopandas.GeoSeries([Point(float(easting), float(northing))])
        point.crs = {'init': 'epsg:27700'}
        point = point.to_crs(epsg=4326)
        
        longitude = str(point[0].x)
        latitude = str(point[0].y)

        if community != 'LIBRARY DEPOT':
            mobiles.append(
                [mobile_name, route, community, stop_name, address, '', longitude, latitude,
                    day, 'Public', arrival, departure, frequency, start, '', '', TIMETABLE]
            )

    create_mobile_library_file(organisation, 'bath_and_north_east_somerset.csv', mobiles)
def run():

    mobiles = []
    with open(DATA_SOURCE) as data_file:
        data = json.load(data_file)

        features = data['features']
        timetable = 'https://www.northlincs.gov.uk/schools-libraries-and-learning/libraries/the-mobile-library/'

        dates = {
            12: '8/01/2019',
            11: '21/01/2019',
            13: '23/01/2019',
            14: '24/01/2019',
            15: '25/01/2019',
            16: '26/01/2019',
            21: '28/01/2019',
            23: '9/01/2019',
            24: '10/01/2019',
            26: '12/01/2019',
            31: '14/01/2019',
            33: '16/01/2019',
            34: '17/01/2019',
            35: '18/01/2019',
            36: '19/01/2019'
        }

        for feature in features:

            frequency = 'FREQ=WEEKLY;INTERVAL=3'
            route_id = feature['properties']['id']
            if route_id == 12:
                frequency = 'FREQ=WEEKLY;INTERVAL=1'
            route_name = 'Route ' + str(route_id)
            day = feature['properties']['day']
            address = feature['properties']['addr']
            arrival = feature['properties']['time1'][:5]
            departure = feature['properties']['time2'][:5]
            community = feature['properties']['name']
            stop_name = feature['properties']['addr']

            longitude = feature['geometry']['coordinates'][0]
            latitude = feature['geometry']['coordinates'][1]

            date = datetime.strptime(dates[route_id], '%d/%m/%Y')
            start = date.strftime('%Y-%m-%d')

            mobiles.append(
                ['Mobile', route_name, community, stop_name, address, '', longitude, latitude,
                    day, 'Public', arrival, departure, frequency, start, '', '', timetable]
            )

    create_mobile_library_file(
        'North Lincolnshire', 'north_lincolnshire.csv', mobiles)
def run():

    ns = {'kml': 'http://www.opengis.net/kml/2.2'}

    tree = ET.parse(DATA_SOURCE)
    root = tree.getroot()

    mobiles = []
    frequency = 'FREQ=WEEKLY;INTERVAL=2'
    timetable = 'https://www.culturepk.org.uk/libraries/services-in-the-community/mobile-library-service/'

    for mobile in root.find('kml:Document', ns).findall('kml:Folder', ns):

        mobile_library = mobile.find('kml:name', ns).text
        for stop in mobile.findall('kml:Placemark', ns):

            address = stop.find('kml:name', ns).text
            places = address.split(' - ')
            stop_name = places[0]
            community = places[-1]

            description = stop.find('kml:description', ns).text.strip()

            description_parts = description.split('<br>')

            times = description_parts[0]
            times_result = re.search(r'(\d{2}:\d{2}).*(\d{2}:\d{2})', times)
            arrival = times_result.group(1)
            departure = times_result.group(2)
            route = description_parts[1]
            for part in description_parts:
                if '-Mar' in part:
                    date = datetime.strptime(part.split(
                        ',')[1].strip() + ' 2019', '%d-%b %Y')
            start = date.strftime('%Y-%m-%d')
            day = date.strftime('%A')

            coordinates = stop.find('kml:Point', ns).find(
                'kml:coordinates', ns).text.strip()
            longitude = coordinates.split(',')[0]
            latitude = coordinates.split(',')[1]

            mobiles.append(
                [mobile_library, route, community, stop_name, address, '', longitude, latitude,
                    day, 'Public', arrival, departure, frequency, start, '', '', timetable]
            )

    create_mobile_library_file(
        'Perth and Kinross', 'perth_and_kinross.csv', mobiles)
def run():
    """Runs the main script"""
    mobiles_ns = []
    mobiles_sg = []
    timetable_ns = 'https://www.n-somerset.gov.uk/my-services/leisure/libraries/bringing-the-library-to-you/mobile-library/'
    timetable_sg = 'https://www.southglos.gov.uk/libraries/mobile-library-almondsbury-severn-beach-pilning/'
    mobile_library = 'Mobile'

    with open(DATA_SOURCE, 'r') as northsom_raw:
        mobreader = csv.reader(northsom_raw, delimiter=',', quotechar='"')
        next(mobreader, None)  # skip the headers
        for row in mobreader:
            route = row[0].strip()
            community = row[1].strip()
            stop_name = row[2].strip()
            frequency = 'FREQ=WEEKLY;INTERVAL=2'
            day = row[4].strip()
            arrival = row[5].strip()
            departure = row[6].strip()
            start = row[7].strip()

            easting = float(row[8].strip())
            northing = float(row[9].strip())
            point = geopandas.GeoSeries([Point(easting, northing)])
            point.crs = {'init': 'epsg:27700'}
            point = point.to_crs({'init': 'epsg:4326'})
            longitude = str(point[0].x)
            latitude = str(point[0].y)
            address = stop_name + ', ' + community

            if route == 'Thursday 1':
                mobiles_sg.append([
                    mobile_library, route, community, stop_name, address, '',
                    longitude, latitude, day, 'Public', arrival, departure,
                    frequency, start, '', '', timetable_sg
                ])
            else:
                mobiles_ns.append([
                    mobile_library, route, community, stop_name, address, '',
                    longitude, latitude, day, 'Public', arrival, departure,
                    frequency, start, '', '', timetable_ns
                ])

    create_mobile_library_file('North Somerset', 'north_somerset.csv',
                               mobiles_ns)
    create_mobile_library_file('South Gloucestershire',
                               'south_gloucestershire.csv', mobiles_sg)
Exemplo n.º 11
0
def run():

    features = read_data(DATA_SOURCE)
    features_sorted = sorted(features,
                             key=lambda k: (k['attributes']['ROUTE']))

    mobiles = []
    mobile_name = 'Mobile'
    organisation = 'West Berkshire'
    frequency = 'FREQ=WEEKLY;INTERVAL=3'

    for feature in features_sorted:

        route = feature['attributes']['ROUTE'][0:1]
        times = feature['attributes']['TIME']
        times_result = re.search(r'(\d{2}.\d{2}).*(\d{2}.\d{2})', times)
        arrival = times_result.group(1).replace('.', ':')
        departure = times_result.group(2).replace('.', ':')
        day = feature['attributes']['DAY']
        community = feature['attributes']['VILLAGE']
        stop_name = feature['attributes']['STOP']
        address = feature['attributes']['NAME']
        dates = feature['attributes']['DATES']

        longitude = feature['geometry']['x']
        latitude = feature['geometry']['y']

        dates_search = re.search(r'Oct: (\d{1,2})', dates)
        date = datetime.strptime(
            dates_search.group(1) + ' Oct 2019', '%d %b %Y')
        start = date.strftime('%Y-%m-%d')
        day = date.strftime('%A')

        mobiles.append([
            mobile_name, route, community, stop_name, address, '', longitude,
            latitude, day, 'Public', arrival, departure, frequency, start, '',
            '', TIMETABLE
        ])

    create_mobile_library_file(organisation, 'west_berkshire.csv', mobiles)
Exemplo n.º 12
0
def run():

    features = read_data(DATA_SOURCE)
    features_sorted = sorted(features,
                             key=lambda k: k['attributes']['ADDRESS'])

    mobiles = []
    organisation = 'York'
    frequency = 'FREQ=WEEKLY;INTERVAL=2'

    for feature in features_sorted:

        route = ''
        stop_name = feature['attributes']['ADDRESS']

        longitude = feature['geometry']['x']
        latitude = feature['geometry']['y']

        # Make the mobile library the first 3 letters of the route
        mobile_library = 'Mobile'

        community = feature['attributes']['ADDRESS']
        date = ''
        arrival = ''
        departure = ''

        address = feature['attributes']['ADDRESS']
        start = ''
        day = ''

        timetable = TIMETABLE

        mobiles.append([
            mobile_library, route, community, stop_name, address, '',
            longitude, latitude, day, 'Public', arrival, departure, frequency,
            start, '', '', timetable
        ])

    create_mobile_library_file(organisation, 'york.csv', mobiles)
def run():

    features = read_data(DATA_SOURCE)
    features_sorted = sorted(
        features,
        key=lambda k: (k['attributes']['Route'], k['attributes']['Stop']))

    mobiles = []
    frequency = 'FREQ=WEEKLY;INTERVAL=2'

    for feature in features_sorted:

        route = feature['attributes']['Route']
        place = feature['attributes']['Place']
        stop_name = place.split(';')[-1].strip().title().replace("'S", "'s")
        community = place.split(';')[0].strip().title().replace("'S", "'s")
        address = stop_name + ', ' + community
        date = datetime.utcfromtimestamp(
            int(feature['attributes']['Date1']) / 1000)
        start = date.strftime('%Y-%m-%d')
        day = date.strftime('%A')
        arrival = feature['attributes']['From_']
        departure = feature['attributes']['To']
        mobile_name = feature['attributes']['Fullname'].replace(
            ' Mobile Library', '')

        longitude = feature['geometry']['x']
        latitude = feature['geometry']['y']

        mobiles.append([
            mobile_name, route, community, stop_name, address, '', longitude,
            latitude, day, 'Public', arrival, departure, frequency, start, '',
            '', TIMETABLE
        ])

    create_mobile_library_file('Shropshire', 'shropshire.csv', mobiles)
def run():

    organisation = 'Aberdeenshire'
    stop_type = 'Public'
    mobiles = []
    with open(DATA_SOURCE, encoding='utf-8') as data_file:

        mobiles_data = json.load(data_file)

        for mobile in mobiles_data:

            mobile_library = mobile[2].replace(
                'Mobile', '').replace('Library', '').strip()
            timetable = 'https://www.livelifeaberdeenshire.org.uk/media/'
            if "North" in mobile_library:
                timetable = timetable + '2808/mobile-north-timetable-160119.pdf'
            if "Central" in mobile_library:
                timetable = timetable + '2807/mobile-central-timetable-160119.pdf'
            if "South" in mobile_library:
                timetable = timetable + '2809/mobile-south-timetable-160119.pdf'

            # the stops are in an array
            stops = mobile[12][0][13][0]

            for stop in stops:
                coord_y = stop[1][0][0][0]
                coord_x = stop[1][0][0][1]

                stop_name = stop[5][0][1][0].replace('\n', '').strip()

                day = ''
                start = ''
                arrival = ''
                departure = ''
                week = ''
                dates = ''

                for attr in stop[5][3]:
                    key = attr[0]
                    val = attr[1][0].replace('\n', '').strip()
                    if key == 'Day':
                        day = val
                    if key == 'Arrival':
                        arrival = val
                    if key == 'Departure':
                        departure = val
                    if key == 'Week':
                        week = val.replace(' & ', '/').replace('Weeks ', '')
                    if key == 'Dates':
                        dates = val

                if (dates != '' and len(dates.split(',')) > 0):
                    # e.g. January 22 should be 2019-01-22
                    date = dates.split(',')[0] + ' 2019'
                    date = datetime.strptime(date, '%B %d %Y')
                    start = date.strftime('%Y-%m-%d')

                # calculated fields
                route = mobile_library + ' ' + day + ' ' + week

                stop_array = stop_name.split(' - ')
                community = stop_array[0]
                if len(stop_array) > 1:
                    stop_name = stop_array[1]
                address = stop_name + ', ' + community

                mobiles.append(
                    [mobile_library, route, community, stop_name, address, '', coord_x, coord_y,
                     day, stop_type, arrival, departure, 'FREQ=WEEKLY;INTERVAL=2', start, '', '', timetable]
                )

    create_mobile_library_file(organisation, 'aberdeenshire.csv', mobiles)
def run():
    """Runs the main script"""

    mobiles = []
    mobile_library = 'Mobile'

    # A single web call listing stops
    stop_list_html = requests.get(WEBSITE)
    stop_list_soup = BeautifulSoup(stop_list_html.text, 'html.parser')

    # For each stop get the stop details
    # Being lazy here and just taking the 4th list - can refine another time
    stops = stop_list_soup.find('div', {'class': 'LBH_MapItems'}).text.split('|')[2:]

    for stop in stops:

        stop_data = stop.split('~')

        latitude = stop_data[1]
        longitude = stop_data[2]
        road = stop_data[5]
        day = stop_data[8]
        address = stop_data[7].lstrip(';').replace(';;', ', ')
        stop_name = address.split(', ')[0]
        community = address.split(', ')[0]
        timetable = 'https://archive.hillingdon.gov.uk' + stop_data[9]
        town = community

        # Now use the stop URL to get the rest of the data (cleaner)
        stop_html = requests.get(timetable)
        stop_soup = BeautifulSoup(stop_html.text, 'html.parser')
        stop_table = stop_soup.find('table', {'class': 'LBH_Table'})

        for row in stop_table.find_all('tr'):

            if 'Road' in row.find('th').text:
                road = row.find_all('td')[0].text.strip()
            
            if 'Location' in row.find('th').text:
                stop_name = row.find_all('td')[0].text.strip()

            if 'Town' in row.find('th').text:
                town = row.find_all('td')[0].text.strip()
            
            if 'Area' in row.find('th').text:
                community = row.find_all('td')[0].text.strip()

            if 'Day' in row.find('th').text:  
                day = row.find_all('td')[0].text.strip()

            if 'Arrive' in row.find('th').text:   
                arrival = row.find_all('td')[0].text.strip()

            if 'Depart' in row.find('th').text:   
                departure = row.find_all('td')[0].text.strip()

        address = stop_name + ', ' + road + ', ' + community + ', ' + town

        route = day # will need to manually correct routes after
        start = '' # will manually set start dates later

        mobiles.append(
            [mobile_library, route, community, stop_name, address, '', longitude, latitude,
                day, 'Public', arrival, departure, 'FREQ=WEEKLY', start, '', '', timetable]
        )

    create_mobile_library_file('Hillingdon', 'hillingdon.csv', mobiles)
Exemplo n.º 16
0
def run():

    timetable = 'https://www.cambridgeshire.gov.uk/residents/libraries-leisure-&-culture/libraries/mobiles/find-a-mobile-library-stop/'
    mobiles = []

    day_codes = {
        'Monday': [MO, 'MO'],
        'Tuesday': [TU, 'TU'],
        'Wednesday': [WE, 'WE'],
        'Thursday': [TH, 'TH'],
        'Friday': [FR, 'FR'],
        'Saturday': [SA, 'SA'],
        'Sunday': [SU, 'SU']
    }

    with open(DATA_SOURCE_STOPS, 'r') as cam_raw:
        mobreader = csv.reader(cam_raw, delimiter=',', quotechar='"')
        next(mobreader, None)  # skip the headers
        for row in mobreader:

            mobile_library = row[0].strip()
            route = row[1].strip()
            week = row[2].strip()
            day = row[3].strip()
            community = row[5].strip()
            stop_name = row[6].strip()
            address = stop_name + ', ' + community
            postcode = row[7].strip()
            frequency = row[8].strip()
            easting = row[9].strip()
            northing = row[10].strip()
            arrival = row[11][0:5].strip()
            departure = row[12][0:5].strip()

            freq = MONTHLY

            repeat_rule = 'FREQ=MONTHLY;BYDAY='
            if '&' in week:
                week = (int(week.split('&')[0].strip()[:1]), int(week.split('&')[1].strip()[:1]))
                repeat_rule = repeat_rule + ','.join(map(lambda x: str(x) + day_codes[day][1], week))
            else:
                week = int(week[:1])
                repeat_rule = repeat_rule + str(week) + day_codes[day][1]

            start = rrule(freq=freq, dtstart=date.today(), bysetpos=week, byweekday=day_codes[day][0], count=1)[0]
            start = start.strftime('%Y-%m-%d')

            latitude = ''
            longitude = ''
            point = geopandas.GeoSeries([Point(float(easting), float(northing))])
            point.crs = {'init': 'epsg:27700'}
            point = point.to_crs(epsg=4326)

            longitude = str(point[0].x)
            latitude = str(point[0].y)

            mobiles.append(
                [mobile_library, route, community, stop_name, address, postcode, longitude, latitude,
                    day, 'Public', arrival, departure, repeat_rule, start, '', '', timetable]
            )

    mobiles_deduped = [i for n, i in enumerate(mobiles) if i not in mobiles[n + 1:]]
    create_mobile_library_file('Cambridgeshire', 'cambridgeshire.csv', mobiles_deduped)
Exemplo n.º 17
0
def run():
    """Runs the main script"""

    mobiles = []
    mobile_library = 'Mobile 1'

    pages = [1, 2, 3, 4]

    dates = {
        "Monday": "2019-05-20",
        "Tuesday": "2019-05-21",
        "Wednesday": "2019-05-22",
        "Thursday": "2019-05-23",
        "Friday": "2019-05-24",
    }

    for page in pages:

        # A single web page listing stops
        stop_list_html = requests.get(WEBSITE + DATA_SOURCE + str(page))
        stop_list_soup = BeautifulSoup(stop_list_html.text, 'html.parser')

        # For each stop get the stop details
        # Being lazy here and just taking the 4th list - can refine another time
        for link in stop_list_soup.find_all('ul')[3].find_all('a'):

            stop_name = link.string
            stop_url = link.get('href')
            stop_html = requests.get(WEBSITE + stop_url)
            stop_soup = BeautifulSoup(stop_html.text, 'html.parser')

            community = ''
            arrival = ''
            departure = ''
            day = ''
            route = ''
            location = stop_soup.find(
                id='map_marker_location_197').get('value')
            longitude = location.split(',')[1]
            latitude = location.split(',')[0]
            timetable = WEBSITE + stop_url

            data_table = stop_soup.find_all('table')[0]
            for row in data_table.find_all('tr'):
                row_header = row.find('th').string.strip()

                if 'Day' in row_header:
                    times = row.find('td').contents[0].replace('.', '')
                    day_matcher = re.compile('(Mon|Tues|Wed|Thurs|Fri)')
                    day = day_matcher.search(times).group(1) + 'day'
                    if (day == 'Wedday'):
                        day = 'Wednesday'
                    times_matcher = re.compile('\d{1,4}')
                    times_matches = re.findall(times_matcher, times)
                    if len(times_matches) > 0:
                        arrival = times_matches[0]
                        arrival_hours = '00'
                        arrival_mins = '00'
                        if len(arrival) == 1:
                            arrival_hours = arrival.rjust(2, '0')
                        if len(arrival) == 2:
                            arrival_hours = arrival
                        if len(arrival) == 3:
                            arrival_hours = arrival[0:1].rjust(2, '0')
                            arrival_mins = arrival[1:3]
                        if len(arrival) == 4:
                            arrival_hours = arrival[0:2]
                            arrival_mins = arrival[2:4]

                        if int(arrival_hours) < 8:
                            arrival_hours = int(arrival_hours) + 12

                        arrival = str(arrival_hours) + ':' + arrival_mins

                    if len(times_matches) > 1:
                        departure = times_matches[1]
                        departure_hours = '00'
                        departure_mins = '00'
                        if len(departure) == 1:
                            departure_hours = departure.rjust(2, '0')
                        if len(departure) == 2:
                            departure_hours = departure
                        if len(departure) == 3:
                            departure_hours = departure[0:1].rjust(2, '0')
                            departure_mins = departure[1:3]
                        if len(departure) == 4:
                            departure_hours = departure[0:2]
                            departure_mins = departure[2:4]

                        if int(departure_hours) < 8:
                            departure_hours = int(departure_hours) + 12

                        departure = str(departure_hours) + ':' + departure_mins

                    route = day
                    start = dates[day]

                if 'Address' in row_header:
                    address = row.find('td').contents[0].strip()

                if 'Postcode' in row_header:
                    postcode = row.find('td').contents[0].strip()

            mobiles.append(
                [mobile_library, route, community, stop_name, address, postcode, longitude, latitude,
                 day, 'Public', arrival, departure, 'FREQ=WEEKLY', start, '', '', timetable]
            )

    create_mobile_library_file('Edinburgh', 'edinburgh.csv', mobiles)
Exemplo n.º 18
0
def run():
    """Runs the main script"""

    # manual locations
    locations = {
        'Glendale, Culford': [52.295329, 0.693698],
        'Village Hall, Risby': [52.265824, 0.634437],
        'Tutelina Road, Great Welnetham': [52.205614, 0.750686],
        'Walnut Tree Cottage, West Stow': [52.334333, 0.521259],
        'The Green, Honington': [52.336852, 0.806826],
        'Bus Shelter, Boxstead Row': [52.139440, 0.677079],
        'Village Hall, Brockley': [52.159501, 0.667900],
        'Top of the Hill, Hartest': [52.135100, 0.687976],
        'Community Hall, Whepstead': [52.193932, 0.681194],
        'Three Ways, Whepstead': [52.194667, 0.673889],
        'Village Hall, Ousden': [52.204576, 0.553058],
        'Church, Ousden': [52.207750, 0.538848],
        'Chequers, Gazely': [52.247857, 0.517684],
        'Needham Hall, Needham Street': [52.263197, 0.520293],
        'The School, Tuddenham St Mary': [52.313586, 0.547267],
        'Opp. 14 Cavenham Road, Tuddenham St Mary': [52.312273, 0.550346],
        'Bishopscroft, Barningham': [52.356090, 0.890150],
        'Honey Pot Lane, Wattisfield': [52.320952, 0.951440],
        'Village Hall, Wattisfield': [52.330484, 0.948689],
        '109 Bury Road, Great Thurlow': [52.126156, 0.456243],
        'Meadow Drive, Horringer': [52.222825, 0.673222],
        'Church, Cotton': [52.261369, 1.032773],
        'Methodist Church, Cotton': [52.264101, 1.019484],
        'Village Hall, Norton': [52.257070, 0.866380],
        'Church Road, Newton Green': [52.036365, 0.796942],
        'Flint Cottages, Smallbridge': [52.173870, 0.794328],
        'Half Moon, Hepworth': [52.335184, 0.913022],
        'Ivy Nook, Beck Street, Hepworth': [52.346656, 0.920684],
        'Stanton Stores, Stanton': [52.323876, 0.884184],
        'Hilltop, Stanton': [52.333412, 0.880192],
        'Community Centre, Walsham Le Willows': [52.302602, 0.935680],
        'Town House Road, Walsham Le Willows': [52.300731, 0.943665],
        'Pumping Station, Cavendish': [52.090559, 0.640543],
        'Church, Cavendish': [52.087629, 0.632805],
        'The Chapel, Lawshall': [52.154713, 0.733109],
        'Swanfields, Lawshall': [52.155029, 0.727303],
        'Village Hall, Lawshall': [52.159793, 0.714417],
        'Village Hall, Buxhall': [52.180829, 0.915938],
        'Green Farm Cottage, Thorpe Morieux': [52.145783, 0.836211],
        'Village Hall, Thorpe Morieux': [52.148876, 0.837022],
        'Primary School, Boxford': [52.027547, 0.859810],
        'Bridge Farm Day Nursery, Martlesham': [52.081405, 1.288248],
        'Falcon Mobile Home Park, Martlesham': [52.064742, 1.281813],
        'Douglas Bader, Martlesham Heath': [52.060970, 1.272164],
        'Village Hall, Bentley': [51.989546, 1.071395],
        'Whissels Farm, Creeting St Mary': [52.177112, 1.085009],
        'Village Hall, Creeting St Mary': [52.172051, 1.070963],
        'Church, Little Stonham': [52.199084, 1.089106],
        'Magpie Inn, Little Stonham': [52.201534, 1.099858],
        'The Green, Higham': [52.260693, 0.557358],
        'Fenn View, Washbrook': [52.034945, 1.078822],
        'Church, Battisford': [52.149227, 1.001806],
        'Battisford Pre School, Battisford': [52.146993, 0.980069],
        'Farnish House, Botesdale': [52.341407, 1.006361],
        'Village Hall, Botesdale': [52.345144, 1.008976],
        'Backhills, Botesdale': [52.345331, 1.006312],
        'Broom Knoll, East Bergholt': [51.971927, 1.061666],
        'South View, East Bergholt': [51.976061, 1.055943],
        'Red Lion, East Bergholt': [51.971688, 1.011434],
        'Paddock Way, Bildeston': [52.105056, 0.910917],
        'White Horse, Hitcham': [52.124151, 0.894430],
        'Village Hall, Hitcham': [52.127769, 0.900729],
        'The Swan car park, Hoxne': [52.349545, 1.199516],
        'St Edmunds House, Hoxne': [52.339660, 1.208437],
        'Fish and Chip shop, Mendlesham': [52.251120, 1.082581],
        'Recreation ground, Mendlesham': [52.248412, 1.078985],
        'The Green, Mendlesham': [52.226707, 1.069991],
        'Lay-by, Wickham Skeith': [52.281808, 1.071562],
        'Swan House, Wickham Skeith': [52.286511, 1.064710],
        'Moorlands, Hollesley': [52.054773, 1.430125],
        'Shepherd & Dog PH, Hollesley': [52.050942, 1.430870],
        'Harewood House, Hollesley': [52.039702, 1.412796],
        'Orchardleigh, North Cove': [52.446345, 1.629062],
        'Old Post Office, Mutford': [52.437027, 1.650539],
        'Hartismere House, Laxfield': [52.300566, 1.363823],
        'New Dawn, Chediston': [52.354000, 1.450951],
        'Midsummer Cottage, Chediston': [52.348039, 1.459990],
        'Nursery, Rendlesham': [52.126116, 1.414609],
        'Village Hall, Wenhaston': [52.323520, 1.558421],
        'Ashburnham Way (Co-op car park), Carlton Colville':
        [52.457034, 1.703553],
        'Green, Saxtead': [52.231834, 1.299388],
        'Foxearth Nursing Home, Saxtead': [52.247412, 1.297638],
        'Bell Inn, Middleton': [52.255023, 1.557760],
        'Mulberry Bush nursery, Eye': [52.329782, 1.144394],
        'Hartismere House, Eye': [52.322536, 1.139949],
        'Spring Park, Otley': [52.157872, 1.229017],
        'Village Stores, Otley': [52.151381, 1.220365],
        'Village Hall, Snape': [52.170943, 1.500930],
        'Near pond, Lound': [52.547929, 1.682302],
        'Lound Hall, Lound': [52.530358, 1.702772],
        'Village Way, Waldringfield': [52.530358, 1.702772]
    }

    mobiles = []
    coordinates = []

    route_list_html = requests.get(WEBSITE + ROUTES)
    route_list_soup = BeautifulSoup(route_list_html.text, 'html.parser')

    route_links = []
    for li in route_list_soup.find_all('li'):
        if li.find('a') is not None and li.find('a').get(
                'title') is not None and 'Mobile Library Route' in li.find(
                    'a').get('title'):
            title = li.text.strip()
            route_links.append({
                'href':
                li.find('a').get('href'),
                'day':
                title.split(' ')[2],
                'route':
                title.split(' ')[0] + ' ' + title.split(' ')[1]
            })

    for route_link in route_links:

        # A single web page listing stops
        stop_list_html = requests.get(WEBSITE + str(route_link['href']))
        stop_list_soup = BeautifulSoup(stop_list_html.text, 'html.parser')

        route_title = stop_list_soup.find('h1').text.strip()
        mobile_library = route_title.split(' Mobile Library ')[0]

        paras = stop_list_soup.find_all('p')
        dates = []
        for para in paras:
            if '2020' in para.text:
                dates = para.text.strip().split(', ')

        if len(dates) > 0:
            date = datetime.strptime(dates[2] + ' 2020', '%d %B %Y')
            start = date.strftime('%Y-%m-%d')
        else:
            start = ''

        # For each stop get the stop details
        for stop in stop_list_soup.find_all('tr')[1:]:

            route = mobile_library + ' ' + route_link['route']
            community = stop.find_all('td')[1].string.strip()
            stop_name = stop.find_all('td')[2].string.strip()
            address = stop_name + ', ' + community
            postcode = ''
            longitude = ''
            latitude = ''
            day = route_link['day'][:-1]
            times = re.sub(r'\D', '', stop.find_all('td')[3].text)
            arrival = times[:2] + ':' + times[2:4]
            departure = times[4:6] + ':' + times[6:]
            timetable = WEBSITE + route_link['href']

            if address not in locations:
                # Geocoding: get the lat/lng
                geo_json = requests.get(NOM_URL + address + '&viewbox=' +
                                        BOUNDS).json()

                if len(geo_json) == 0:
                    geo_json = requests.get(NOM_URL + community + '&viewbox=' +
                                            BOUNDS).json()

                if len(geo_json) > 0:
                    x = round(float(geo_json[0]['lon']), 5)
                    y = round(float(geo_json[0]['lat']), 5)
                    bbox = BOUNDS.split(',')
                    if float(bbox[0]) <= x and x <= float(bbox[2]) and float(
                            bbox[1]) <= y and y <= float(
                                bbox[3]) and x not in coordinates:
                        # Don't add duplicates - we'll manually sort em out laters
                        coordinates.append(x)
                        longitude = x
                        latitude = y
            else:
                longitude = locations[address][1]
                latitude = locations[address][0]

            mobiles.append([
                mobile_library, route, community, stop_name, address, postcode,
                longitude, latitude, day, 'Public', arrival, departure,
                'FREQ=WEEKLY;INTERVAL=4', start, '', '', timetable
            ])

            time.sleep(6)

    create_mobile_library_file('Suffolk', 'suffolk.csv', mobiles)
Exemplo n.º 19
0
def run():
    """Runs the main script"""

    mobiles = []
    mobile_library = 'Mobile 1'

    frequency = 'FREQ=WEEKLY;INTERVAL=3'

    # A single web page listing stops
    url = WEBSITE + DATA_SOURCE
    stop_list_html = requests.get(url)
    stop_list_soup = BeautifulSoup(stop_list_html.text, 'lxml')

    # For each stop get the stop details
    for route in stop_list_soup.find_all(
            'div', {"class": "field-group-accordion-wrapper"}):

        route_name = route.find('h4').string.split('(')[0].strip()
        day = route.find('h4').string.split('(')[1].split(',')[1].replace(
            ')', '').strip()

        stops = route.find_all('ol')[0]

        start = 'Jan 3'
        dates = route.find_all('ul')[0]
        date_list = dates.find_all('li')
        if (date_list[0].string):
            start = dates.find_all('li')[0].string.strip()
        if 'and' in start:
            start = start.split('and')[0]
        date_obj = datetime.strptime(start + ' 2019', '%b %d %Y')
        start = datetime.strftime(date_obj, '%Y-%m-%d')

        for stop in stops.find_all('li'):

            # first extract the postcode
            postcode_match = re.compile(POSTCODE_RE).search(stop.string)

            postcode = ''
            if postcode_match:
                postcode = postcode_match.group(1)
            if postcode == '' and 'Brymbo' in stop.string:
                postcode = 'LL11 5AG'
            if postcode == '' and 'Cynddelw School' in stop.string:
                postcode = 'LL20 7HH'
            if postcode == 'SY13 0GB':
                postcode = 'LL13 0GB'
            if postcode == 'LL11 5GS':
                postcode = 'LL11 5SY'

            # then do the geocoding from postcode lookup
            postcode_request = requests.get(
                'https://api.postcodes.io/postcodes/' + postcode)
            postcode_data = json.loads(postcode_request.text)
            latitude = postcode_data['result']['latitude']
            longitude = postcode_data['result']['longitude']

            # take postcode out of the main string
            stop_str = stop.string.replace(postcode,
                                           '').replace('.',
                                                       ':').replace('–', '-')

            data_match = re.compile(DATA_RE)
            data = data_match.search(stop_str)

            community = data.group(1).strip()
            stop_name = data.group(2).strip()
            arrival_hours = data.group(3).strip()
            if int(arrival_hours) < 8:
                arrival_hours = int(arrival_hours) + 12
            arrival_mins = data.group(4).strip()
            if arrival_mins == '':
                arrival_mins = '00'
            departure_hours = data.group(5).strip()
            if int(departure_hours) < 8:
                departure_hours = int(departure_hours) + 12
            departure_mins = data.group(6).strip()
            if departure_mins == '':
                departure_mins = '00'
            arrival = str(arrival_hours) + ':' + arrival_mins
            departure = str(departure_hours) + ':' + departure_mins

            address = stop_name + ', ' + community

            mobiles.append([
                mobile_library, route_name, community, stop_name, address,
                postcode, longitude, latitude, day, 'Public', arrival,
                departure, frequency, start, '', '', url
            ])

    create_mobile_library_file('Wrexham', 'wrexham.csv', mobiles)
Exemplo n.º 20
0
def run():
    """Runs the main script"""
    mobiles = []

    route_list_html = requests.get(WEBSITE + ROUTES_LIST)
    route_list_soup = BeautifulSoup(route_list_html.text, 'html.parser')

    route_links = []
    for link in route_list_soup.find_all('a'):
        if 'mobile-library-route' in link.get('href'):
            route_links.append(link.get('href'))

    for link in route_links:
        route_text = ''
        route_encoded_link = str(
            base64.urlsafe_b64encode(link.encode("utf-8")), 'utf-8')
        if not path.exists('../raw/essex/' + route_encoded_link + '.txt'):
            route_html = requests.get(link)
            route_text = route_html.text
            # save the data out as web scraping seems to be getting blocked so may take a few goes
            route_file = open('../raw/essex/' +
                              route_encoded_link + '.txt', "w")
            route_file.write(route_text)
            route_file.close()
            time.sleep(10)
        else:
            route_text = open('../raw/essex/' +
                              route_encoded_link + '.txt', 'r').read()

        route_soup = BeautifulSoup(route_text, 'html.parser')

        stop_links = []
        for stop_link in route_soup.find_all('table')[0].find_all('a'):
            stop_links.append(WEBSITE + stop_link.get('href'))

        for stop in stop_links:
            stop_text = ''
            stop_encoded_link = str(base64.urlsafe_b64encode(
                stop.encode("utf-8")), 'utf-8')
            if not path.exists('../raw/essex/' + stop_encoded_link + '.txt'):
                stop_html = requests.get(stop)
                stop_text = stop_html.text
                stop_file = open('../raw/essex/' +
                                 stop_encoded_link + '.txt', "w")
                stop_file.write(stop_text)
                stop_file.close()
            else:
                stop_text = open('../raw/essex/' +
                                 stop_encoded_link + '.txt', 'r').read()

            stop_soup = BeautifulSoup(stop_text, 'html.parser')
            values = stop_soup.find_all('div', {"class": "pfont"})
            stop_name = stop_soup.find_all(
                'div', {"class": "yellow-wrapper"})[0].find("h1").text
            community = values[0].text.strip().splitlines()[0].strip()
            address = stop_name + ', ' + community
            postcode = values[0].text.strip().splitlines()[-1].strip()
            if postcode == 'CM133AS':
                postcode = 'CM132AS'
            if postcode == 'RM4 1ED':
                postcode = 'RM4 1LU'
            frequency = 'FREQ=WEEKLY;INTERVAL=' + values[1].text.strip()[:1]
            day = values[2].text.strip()
            times = values[3].text.strip()
            route_mobile = values[4].text.strip()
            route = 'Week ' + route_mobile.split('week')[1].strip() + ' ' + day
            mobile_library = route_mobile.split('week')[0].strip()
            start = values[6].text.strip()
            start = datetime.strptime(start, '%d %B %Y')
            start = start.strftime('%Y-%m-%d')
            arrival = times.split('to')[0].replace('am', '').replace(
                'pm', '').strip().replace('.', '')

            arrival_hours = '00'
            arrival_mins = '00'
            if len(arrival) == 1:
                arrival_hours = arrival.rjust(2, '0')
            if len(arrival) == 2:
                arrival_hours = arrival
            if len(arrival) == 3:
                arrival_hours = arrival[0:1].rjust(2, '0')
                arrival_mins = arrival[1:3]
            if len(arrival) == 4:
                arrival_hours = arrival[0:2]
                arrival_mins = arrival[2:4]

            if int(arrival_hours) < 8:
                arrival_hours = int(arrival_hours) + 12

            arrival = str(arrival_hours) + ':' + arrival_mins

            departure = times.split('to')[1].replace(
                'am', '').replace('pm', '').strip().replace('.', '')

            departure_hours = '00'
            departure_mins = '00'
            if len(departure) == 1:
                departure_hours = departure.rjust(2, '0')
            if len(departure) == 2:
                departure_hours = departure
            if len(departure) == 3:
                departure_hours = departure[0:1].rjust(2, '0')
                departure_mins = departure[1:3]
            if len(departure) == 4:
                departure_hours = departure[0:2]
                departure_mins = departure[2:4]

            if int(departure_hours) < 8:
                departure_hours = int(departure_hours) + 12

            departure = str(departure_hours) + ':' + departure_mins

            url = 'https://api.postcodes.io/postcodes/' + postcode
            postcode_request = requests.get(url)
            postcode_data = json.loads(postcode_request.text)
            latitude = postcode_data['result']['latitude']
            longitude = postcode_data['result']['longitude']

            mobiles.append(
                [mobile_library, route, community, stop_name, address, postcode, longitude, latitude,
                 day, 'Public', arrival, departure, frequency, start, '', '', stop]
            )

    create_mobile_library_file('Essex', 'essex.csv', mobiles)
def run():

    ns = {'kml': 'http://www.opengis.net/kml/2.2'}

    dates = {
        1: {
            "Monday": "2020-02-24",
            "Tuesday": "2020-02-25",
            "Wednesday": "2020-02-26",
            "Thursday": "2020-02-27",
            "Friday": "2020-02-28"
        },
        2: {
            "Monday": "2020-03-02",
            "Tuesday": "2020-03-03",
            "Wednesday": "2020-03-04",
            "Thursday": "2020-03-05",
            "Friday": "2020-03-06"
        }
    }

    tree = ET.parse(DATA_SOURCE)
    root = tree.getroot()

    mobiles = []

    timetable = 'https://www.midlothian.gov.uk/info/427/libraries/446/mobile_library'

    mobile_library = 'Mobile'

    for route in root.find('kml:Document', ns).findall('kml:Folder', ns):

        frequency = 'FREQ=WEEKLY;INTERVAL=2'

        route_name = route.find('kml:name', ns).text.replace(' Stops', '')
        day = 'Monday'
        if 'Tuesday' in route_name:
            day = 'Tuesday'
        if 'Wednesday' in route_name:
            day = 'Wednesday'
        if 'Thursday' in route_name:
            day = 'Thursday'
        if 'Friday' in route_name:
            day = 'Friday'

        duplicate = False
        week = 2
        if 'Week 1' in route_name:
            week = 1
        if '&' in route_name:
            # stop is also a route 2 stop
            duplicate = True
            route_name = route_name.replace('Week 1 & 2', 'Week 1')
        start = dates[week][day]

        for stop in route.findall('kml:Placemark', ns):

            address = stop.find('kml:name', ns).text
            stop_name = stop.find('kml:name', ns).text
            community = stop.find('kml:name', ns).text

            description = stop.find('kml:description', ns).text.strip()

            if '<br' in description:
                detail = description.split('<br')[0].strip()
                if 'day' not in detail:
                    stop_name = description.split('<br')[0].strip()
                    address = stop_name + ', ' + community

            times_result = re.search(r'(\d{1,2}:\d{2}).*?(\d{1,2}:\d{2})',
                                     description.replace('.', ':'))
            arrival = times_result.group(1)
            departure = times_result.group(2)

            coordinates = stop.find('kml:Point',
                                    ns).find('kml:coordinates',
                                             ns).text.strip()
            longitude = coordinates.split(',')[0]
            latitude = coordinates.split(',')[1]

            mobiles.append([
                mobile_library, route_name, community, stop_name, address, '',
                longitude, latitude, day, 'Public', arrival, departure,
                frequency, start, '', '', timetable
            ])

            if duplicate:
                mobiles.append([
                    mobile_library,
                    route_name.replace('Week 1',
                                       'Week 2'), community, stop_name,
                    address, '', longitude, latitude, day, 'Public', arrival,
                    departure, frequency, dates[2][day], '', '', timetable
                ])

    create_mobile_library_file('Midlothian', 'midlothian.csv', mobiles)
Exemplo n.º 22
0
def run():
    """Runs the main script"""

    mobiles = []
    mobile_library = 'Mobile'

    route_names = {}

    route_number = 0

    stop_soup = BeautifulSoup(open(HTML), "html.parser")

    for venue in stop_soup.find_all('div',
                                    {"class": "er-filter-block-wrapper"}):

        venue_type = venue.find('div', {
            "data-id": "venue_type"
        }).find('div', {
            "class": "content"
        }).get_text()

        if (venue_type) == 'Library_All/Library_Mobile Library':

            stop_name = venue.find('div', {
                "data-id": "name"
            }).find('div', {
                "class": "content"
            }).find('a').get_text().replace(' Mobile Library', '')
            community = stop_name
            postcode = venue.find('div', {
                "data-id": "postcode"
            }).find('div', {
                "class": "content"
            }).get_text()
            address = stop_name + ', ' + postcode
            latitude = venue.find('div', {
                "data-id": "latitude"
            }).find('div', {
                "class": "content"
            }).get_text()
            longitude = venue.find('div', {
                "data-id": "longitude"
            }).find('div', {
                "class": "content"
            }).get_text()
            timetable = 'https://www.eastridinglibraries.co.uk' + \
                venue.find('div', {"data-id": "learn_more"}).find('div',
                                                                  {"class": "content"}).find('a').get('href').strip()

            openings = venue.find('div', {
                "data-id": "opening_times"
            }).find('div', {
                "class": "content"
            }).get_text().split(';')

            opening_times = ''
            for entry in openings:
                if '2019' in entry:
                    opening_times = entry

            times_result = re.search(
                r'(\d{1,2}\.{0,1}\d{1,2})-(\d{1,2}\.{0,1}\d{1,2})',
                opening_times)
            arrival = times_result.group(1).replace('.', ':')
            if len(arrival) < 3:
                arrival = arrival + ':00'
            if (len(arrival.split(':')[1])) < 2:
                arrival = arrival + '0'
            departure = times_result.group(2).replace('.', ':')
            if len(departure) < 3:
                departure = departure + ':00'
            if (len(departure.split(':')[1])) < 2:
                departure = departure + '0'
            date_result = re.search(r'(2019\d{4})', opening_times)
            start = datetime.strptime(date_result.group(1), '%Y%m%d')
            day = start.strftime('%A')
            start = start.strftime('%Y-%m-%d')

            frequency_result = re.search(r'2019\d{4}(\d)', opening_times)
            frequency = 'FREQ=WEEKLY;INTERVAL=' + frequency_result.group(1)
            if not route_names.get(start):
                route_number = route_number + 1
                route_names[start] = route_number

            route = route_names[start]

            mobiles.append([
                mobile_library, route, community, stop_name, address, postcode,
                longitude, latitude, day, 'Public', arrival, departure,
                frequency, start, '', '', timetable
            ])

    create_mobile_library_file('East Riding of Yorkshire',
                               'east_riding_of_yorkshire.csv', mobiles)
def run():
  """Runs the main script"""

  # Scrape stop information. This is a single web page listing stops
  stop_list = 'https://services.wiltshire.gov.uk/MobileLibrary/Library/StopList'
  stop_list_html = requests.get(stop_list)
  stop_list_soup = BeautifulSoup(stop_list_html.text, 'html.parser')

  # If we don't already have it, create the raw file
  if not os.path.isfile(DATA_OUTPUT_RAW):
    mobiles = []
    # For each stop get the stop details
    for link in stop_list_soup.find_all('a'):
        # Detect whether the link is a link to a stop
        if '/MobileLibrary/Library/Stop/' in link.get('href'):

          # Get the webpage
          stop_url = 'https://services.wiltshire.gov.uk' + link.get('href')
          stop_html = requests.get(stop_url)
          stop_soup = BeautifulSoup(stop_html.text, 'html.parser')

          # General stop information
          stop_name = stop_soup.find('h2').text.strip()
          community = stop_name.split(', ')[0].strip()
          stop_name = stop_name.split(
              ', ')[1].replace(' (fortnightly stop)', '').strip()
          address = stop_name + ', ' + community

          # There are some stops that are two weekly but they're part of separate routes.  Keep them separate
          frequency = 4

          # Detailed information for the stop is found in the table.
          table = stop_soup.find('table').find('tbody')
          stop_rows = table.find_all('tr')

          for stop in stop_rows:
            round_name = stop.find('a').text.replace(
                '\r\n', '').replace(' (fortnightly stop)', '')
            mobile_library = round_name.split(
                ', ')[0].replace(' mobile library', '')
            day_week = round_name.split(', ')[1]
            route = day_week.replace('week', 'Week')
            week = day_week.split(' week ')[1]
            day = day_week.split(' week ')[0]
            date = datetime.strptime(stop.find('li').text, '%A %d %B, %Y')
            date_output = date.strftime('%Y-%m-%d')

            start = stop.find_all('td')[1].text
            end = stop.find_all('td')[2].text
            timetable = 'http://services.wiltshire.gov.uk' + \
                stop.find('a').get('href')

            # Mobile,Route,Stop,Community,Address,Longitude,Latitude,Date,Day,Frequency,Start,End,Timetable
            mobile = {'mobile': mobile_library, 'route': route, 'stop': stop_name, 'community': community, 'address': address,
                      'date': date_output, 'day': day, 'frequency': frequency, 'start': start, 'end': end, 'timetable': timetable}
            mobiles.append(mobile)
            time.sleep(1)

    with open(DATA_OUTPUT_RAW, 'w', encoding='utf8', newline='') as out_raw:
      mob_writer = csv.writer(
          out_raw, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
      mob_writer.writerow(['Mobile', 'Route', 'Community', 'Stop', 'Address',
                           'Date', 'Day', 'Frequency', 'Start', 'End', 'Timetable'])

      for sto in mobiles:
        mob_writer.writerow([sto['mobile'], sto['route'], sto['community'], sto['stop'], sto['address'],
                             sto['date'], sto['day'], sto['frequency'], sto['start'], sto['end'], sto['timetable']])

  mobiles = []
  coordinates = []
  with open(DATA_OUTPUT_RAW, 'r', encoding='utf8', newline='') as raw:
    mobreader = csv.reader(raw, delimiter=',', quotechar='"')
    next(mobreader, None)  # skip the headers
    # Mobile,Route,Community,Stop,Address,Date,Day,Frequency,Start,End,Timetable
    for row in mobreader:

      longitude = ''
      latitude = ''

      if row[4] not in locations:
        # Geocoding: get the lat/lng
        geo_json = requests.get(NOM_URL + row[4] + '&viewbox=' + BOUNDS).json()

        if len(geo_json) == 0:
          geo_json = requests.get(
              NOM_URL + row[2] + '&viewbox=' + BOUNDS).json()

        if len(geo_json) > 0:
          x = round(float(geo_json[0]['lon']), 5)
          y = round(float(geo_json[0]['lat']), 5)
          bbox = BOUNDS.split(',')

          if float(bbox[0]) <= x and x <= float(bbox[2]) and float(bbox[1]) <= y and y <= float(bbox[3]) and x not in coordinates:
            # Don't add duplicates - we'll manually sort em out laters
            coordinates.append(x)
            longitude = x
            latitude = y
      else:
        longitude = locations[row[4]][1]
        latitude = locations[row[4]][0]

      # Mobile,Route,Stop,Community,Address,Longitude,Latitude,Date,Day,Frequency,Start,End,Timetable
      mobile = [row[0], row[1], row[2], row[3], row[4], '', longitude,
                latitude, row[6], 'Public', row[8], row[9], 'FREQ=WEEKLY;INTERVAL=4', row[5], '', '', row[10]]
      mobiles.append(mobile)

  create_mobile_library_file('Wiltshire', 'wiltshire.csv', mobiles)
def run():
    """Runs the main script"""

    mobiles = []
    mobile_library = 'Mobile'

    # Get the A-Z links
    az_list_html = requests.get(WEBSITE + A_Z_PAGE)
    az_list_soup = BeautifulSoup(az_list_html.text, 'html.parser')

    for az_link in az_list_soup.find(
            'ul', {
                'class': 'item-list item-list__inline a-to-z'
            }).find_all('a'):

        # A single web page listing stops for the alphabet letter
        stop_list_html = requests.get(WEBSITE + az_link.get('href'))
        stop_list_soup = BeautifulSoup(stop_list_html.text, 'html.parser')

        # For each stop get the stop details
        for link in stop_list_soup.find_all(
                'ul', {'class': 'item-list'})[2].find_all('a'):

            community = link.text.replace(' Mobile Library Timetable', '')
            stop_url = link.get('href')

            stop_html = requests.get(WEBSITE + stop_url)
            stop_soup = BeautifulSoup(stop_html.text, 'html.parser')

            stop_schedule = stop_soup.find(
                'table', {
                    'class': 'data-table directory-record'
                }).find_all('td')[0].text
            schedule_matcher = re.compile(r'(\d)(st|nd|rd|th) (.*day)')
            schedule_search = schedule_matcher.search(stop_schedule)

            week = int(schedule_search.group(1))
            day = schedule_search.group(3)

            start = rrule(freq=MONTHLY,
                          dtstart=date.today(),
                          bysetpos=week,
                          byweekday=day_codes[day][0],
                          count=1)[0]
            start = start.strftime('%Y-%m-%d')
            repeat_rule = 'FREQ=MONTHLY;BYDAY=' + str(week) + day_codes[day][1]

            route = day + ' Week ' + str(week)

            location = stop_soup.find(
                id='map_marker_location_10798').get('value')
            longitude = location.split(',')[1]
            latitude = location.split(',')[0]

            if community == 'Longdon':
                longitude = '-2.239241'
                latitude = '52.023780'

            if community == 'Sytchampton':
                stop_rows = ['11:05 to 11:20 - Sytchampton, Brakeshill']
            else:
                editor = stop_soup.find('div', {'class': 'editor'})
                stop_rows = [
                    stop.text.replace('\r\n', ' ')
                    for stop in editor.find_all('p')
                ]
                if len(stop_rows) < 5:
                    stop_rows = stop_soup.find('div', {
                        'class': 'editor'
                    }).text.splitlines()

            for stop in stop_rows:

                if (community in stop):

                    stop_times_matcher = re.compile(
                        r'(\d{1,2}:\d{2}).*?(\d{1,2}:\d{2})(.*)')
                    times_result = re.search(stop_times_matcher,
                                             stop.replace('.', ':'))
                    arrival = times_result.group(1)
                    departure = times_result.group(2)
                    stop_name = times_result.group(3).replace(
                        community,
                        '').replace(',', '').replace('-',
                                                     '').replace('–',
                                                                 '').strip()
                    if stop_name == '':
                        stop_name = community
                    address = stop_name + ', ' + community

                    mobiles.append([
                        mobile_library, route, community, stop_name, address,
                        '', longitude, latitude, day, 'Public', arrival,
                        departure, repeat_rule, start, '', '',
                        WEBSITE + stop_url
                    ])

    create_mobile_library_file('Worcestershire', 'worcestershire.csv', mobiles)
Exemplo n.º 25
0
def run():

    dates = {
        "Monday": "2019-04-08",
        "Tuesday": "2019-04-09",
        "Wednesday": "2019-04-10",
        "Thursday": "2019-04-11",
        "Friday": "2019-04-12",
        "Saturday": "2019-04-13"
    }

    # add more as needed
    namespaces = {'xmlns': 'http://www.opengis.net/kml/2.2'}
    kml_tree = ET.parse(DATA_SOURCE)
    root = kml_tree.getroot()

    organisation = 'West Dunbartonshire'
    mobile = 'Mobile'
    timetable = 'https://www.west-dunbarton.gov.uk/libraries/mobile-housebound-services/mobile-library-service/mobile-library-timetable/'
    mobiles = []

    for folder in root.find('xmlns:Document', namespaces).findall('xmlns:Folder', namespaces):

        folder_name = folder.find('xmlns:name', namespaces).text
        sections = re.split('(?i)morning|afternoon', folder_name)

        route_name = sections[0].strip()
        community = sections[1].replace('-', '').strip()

        start = dates[route_name]

        for stop in folder.findall('xmlns:Placemark', namespaces):
            stop_name = stop.find('xmlns:name', namespaces).text
            address = stop_name + ', ' + community
            coords = stop.find('xmlns:Point', namespaces).find(
                'xmlns:coordinates', namespaces).text
            geox = coords.split(',')[0].strip()
            geoy = coords.split(',')[1].strip()
            day = route_name
            description = stop.find('xmlns:description', namespaces).text
            description_sections = re.split(
                '(?i)morning|afternoon', description)
            times = description_sections[1].strip().replace(
                '.', '').replace(':', '')
            times_matcher = re.compile('\d{1,4}')
            times_matches = re.findall(times_matcher, times)
            if len(times_matches) > 0:
                arrival = times_matches[0]
                arrival_hours = '00'
                arrival_mins = '00'
                if len(arrival) == 1:
                    arrival_hours = arrival.rjust(2, '0')
                if len(arrival) == 2:
                    arrival_hours = arrival
                if len(arrival) == 3:
                    arrival_hours = arrival[0:1].rjust(2, '0')
                    arrival_mins = arrival[1:3]
                if len(arrival) == 4:
                    arrival_hours = arrival[0:2]
                    arrival_mins = arrival[2:4]

                if int(arrival_hours) < 8:
                    arrival_hours = int(arrival_hours) + 12

                arrival = str(arrival_hours) + ':' + arrival_mins

            if len(times_matches) > 1:
                departure = times_matches[1]
                departure_hours = '00'
                departure_mins = '00'
                if len(departure) == 1:
                    departure_hours = departure.rjust(2, '0')
                if len(departure) == 2:
                    departure_hours = departure
                if len(departure) == 3:
                    departure_hours = departure[0:1].rjust(2, '0')
                    departure_mins = departure[1:3]
                if len(departure) == 4:
                    departure_hours = departure[0:2]
                    departure_mins = departure[2:4]

                if int(departure_hours) < 8:
                    departure_hours = int(departure_hours) + 12

                departure = str(departure_hours) + ':' + departure_mins

            mobiles.append(
                [mobile, route_name, community, stop_name, address, '', geox, geoy,
                 day, 'Public', arrival, departure, 'FREQ=WEEKLY;INTERVAL=2', start, '', '', timetable]
            )

    create_mobile_library_file(
        organisation, 'west_dunbartonshire.csv', mobiles)
Exemplo n.º 26
0
def run():

    mobiles = []
    with open(DATA_SOURCE) as data_file:
        data = json.load(data_file)

        features = data['features']
        timetable = 'https://www.angusalive.scot/media/1708/mobile20library20timetable20-20new20service.pdf'

        dates = {
            1: {
                "Monday": "2019-03-04",
                "Tuesday": "2019-03-05",
                "Wednesday": "2019-03-06",
                "Thursday": "2019-03-07",
                "Friday": "2019-03-08",
            },
            2: {
                "Monday": "2019-03-11",
                "Tuesday": "2019-03-12",
                "Wednesday": "2019-03-13",
                "Thursday": "2019-03-14",
                "Friday": "2019-03-15",
            }
        }

        for feature in features:
            easting = feature['properties']['grid_x']
            northing = feature['properties']['grid_y']

            mobile_library = feature['properties']['vehicle'].title()

            day = feature['properties']['day'].rstrip('s')

            community = ''
            stop_name = feature['properties']['location'].title()
            address = stop_name.title()
            stop_split = stop_name.split(': ')

            if len(stop_split) > 1:
                community = stop_split[0].title()
                stop_name = stop_split[1].title()
                address = stop_name + ', ' + community

            arrival = feature['properties']['time_arrive'].replace(':00Z', '')
            departure = feature['properties']['time_depart'].replace(
                ':00Z', '')

            week = feature['properties']['week']

            route = mobile_library + ' ' + 'Week ' + str(week) + ' ' + day

            start = dates[week][day]

            latitude = ''
            longitude = ''

            point = geopandas.GeoSeries([Point(easting, northing)])
            point.crs = {'init': 'epsg:27700'}
            point = point.to_crs({'init': 'epsg:4326'})

            longitude = str(point[0].x)
            latitude = str(point[0].y)

            mobiles.append([
                mobile_library, route, community, stop_name, address, '',
                longitude, latitude, day, 'Public', arrival, departure,
                'FREQ=WEEKLY;INTERVAL=2', start, '', '', timetable
            ])

    create_mobile_library_file('Angus', 'angus.csv', mobiles)