def load_courts_from_file(json_path): new_courts = [] (path, mimetype) = path_and_mimetype(json_path) with open(path) as courts_json: courts = json.load(courts_json) for item in courts: # translate the JSON data into a DA Address types address = Address() address.address = item['address']['address'] address.city = item['address']['city'] address.state = item['address']['state'] address.zip = item['address']['zip'] address.county = item['address']['county'] court = MACourt() court.name = item['name'] court.phone = item['phone'] court.fax = item['fax'] court.address = address court.lat = item['lat'] court.lng = item['lng'] new_courts.append(court) return new_courts
def get_courts_from_massgov_url(url): """Load specified court directory page on Mass.gov and return a list of dictionaries Properties include name, phone, fax, address, description (usually includes cities or county served), latitude, longitude """ page = requests.get(url) soup = bs4.BeautifulSoup(page.text, 'html.parser') jstring = soup.find_all( attrs={"data-drupal-selector": "drupal-settings-json"} )[0].text # this is the element that has the JSON data as of 6/19/2018 jdata = json.loads(jstring) markers = jdata['locations']['googleMap']['markers'] courts = [] for marker in markers: name = marker['infoWindow']['name'] for item in jdata['locations']['imagePromos']['items']: description = '' if item['title']['text'] == name: description = item['description']['richText']['rteElements'][ 0]['data']['rawHtml']['content']['#context']['value'] break address = Address() address.address = marker['infoWindow']['address'] address.geolocate() address.normalize() courts.append({ 'name': marker['infoWindow']['name'], 'phone': marker['infoWindow']['phone'], 'fax': marker['infoWindow']['fax'], 'address': { 'address': address.address, 'city': address.city, 'state': address.state, 'zip': address.zip, 'county': address.county }, 'description': description, 'lat': marker['position']['lat'], 'lng': marker['position']['lng'] }) return courts
def get_courts_from_massgov_url(url, shim_ehc_middlesex=True, shim_nhc_woburn=True): searcher = SearchEngine(simple_zipcode=True) """Load specified court directory page on Mass.gov and returns an MACourtList Properties include name, phone, fax, address, description (usually includes cities or county served), latitude, longitude """ page = requests.get(url) soup = bs4.BeautifulSoup(page.text, 'html.parser') jstring = soup.find_all( attrs={"data-drupal-selector": "drupal-settings-json"} )[0].text # this is the element that has the JSON data as of 6/19/2018 jdata = json.loads(jstring) markers = jdata['locations']['googleMap']['markers'] courts = [] # The address and description are in a different part of the JSON for marker in markers: html_name = marker['infoWindow']['name'] for item in jdata['locations']['imagePromos']['items']: description = '' if item['title']['text'] in html_name: name = item['title']['text'].rstrip() description = item['description']['richText']['rteElements'][ 0]['data']['rawHtml']['content']['#context']['value'] break address = Address() orig_address = marker['infoWindow'][ 'address'] # The geolocate method does _not_ work with PO Boxes (silently discards) clean_address = re.sub(r' *PO Box .*?,', "", orig_address) clean_address = re.sub(r' *P.O. Box .*?,', "", orig_address) has_po_box = not clean_address == orig_address # We want to track if there was a PO Box where mail should be delivered address.address = orig_address # See: https://usaddress.readthedocs.io/en/latest/ which explains how the mapping below prevents a RepeatedLabelError. # Basically parsing into line 1, line 2, etc is good enough for our use case. tag_mapping = { 'Recipient': 'recipient', 'AddressNumber': 'address', 'AddressNumberPrefix': 'address', 'AddressNumberSuffix': 'address', 'StreetName': 'address', 'StreetNamePreDirectional': 'address', 'StreetNamePreModifier': 'address', 'StreetNamePreType': 'address', 'StreetNamePostDirectional': 'address', 'StreetNamePostModifier': 'address', 'StreetNamePostType': 'address', 'CornerOf': 'address', 'IntersectionSeparator': 'address', 'LandmarkName': 'address', 'USPSBoxGroupID': 'address', 'USPSBoxGroupType': 'address', 'USPSBoxID': 'address', 'USPSBoxType': 'address', 'BuildingName': 'unit', 'OccupancyType': 'unit', 'OccupancyIdentifier': 'unit', 'SubaddressIdentifier': 'unit', 'SubaddressType': 'unit', 'PlaceName': 'city', 'StateName': 'state', 'ZipCode': 'zip', } try: address_parts = usaddress.tag(orig_address, tag_mapping=tag_mapping) except usaddress.RepeatedLabelError: address_parts = usaddress.tag( clean_address, tag_mapping=tag_mapping ) # Discard the PO box entry if necessary - not a valid address try: if address_parts[1].lower() == 'street address': address.address = address_parts[0].get('address') if address_parts[0].get('unit'): address.unit = address_parts[0].get('unit') address.city = address_parts[0].get('city') address.state = address_parts[0].get('state') address.zip = address_parts[0].get('zip') zipinfo = searcher.by_zipcode(address.zip) address.county = zipinfo.county del zipinfo else: raise Exception('We expected a Street Address.') except: address.address = orig_address #address.geolocate(self.elements.get('full_address','')) if not hasattr(address, 'address'): address.address = '' if not hasattr(address, 'city'): address.city = '' if not hasattr(address, 'state'): address.state = '' if not hasattr(address, 'zip'): address.zip = '' if not hasattr(address, 'county'): address.county = '' #if not hasattr(address, 'unit'): #address.unit = '' # store the data in a serializable format. maybe could refactor to use object_hooks, but would need to go all the way down to DAObject? court = { 'name': name, 'description': description, 'has_po_box': has_po_box, 'phone': marker['infoWindow']['phone'], 'fax': marker['infoWindow']['fax'], 'address': { 'city': address.city, 'address': address.address, 'state': address.state, 'zip': address.zip, 'county': address.county, 'orig_address': orig_address # the one-line original address, which may include a PO Box }, 'location': { 'latitude': marker['position']['lat'], 'longitude': marker['position']['lng'] } } if hasattr(address, 'unit'): court['address']['unit'] = address.unit courts.append(court) if shim_ehc_middlesex and url == 'https://www.mass.gov/orgs/housing-court/locations': court = { 'name': "Eastern Housing Court - Middlesex Session", 'description': "The Middlesex Session of the Eastern Housing Court serves Arlington, Belmont, and Cambridge, Medford and Somerville", 'has_po_box': False, 'phone': "(781) 306-2715", 'fax': "", 'address': { 'city': "Medford", 'address': "4040 Mystic Valley Parkway", 'state': "MA", 'zip': "02155", 'county': "Middlesex County", 'orig_address': "4040 Mystic Valley Parkway, Medford, MA 02155" }, 'location': { 'latitude': 42.4048336, 'longitude': -71.0893853 } } courts.append(court) if shim_nhc_woburn and url == 'https://www.mass.gov/orgs/housing-court/locations': court = { 'name': "Northeast Housing Court - Woburn Session", 'description': "The Woburn session of the Northeast Housing Court serves Bedford, Burlington, Concord, Everett,Lexington, Lincoln, Malden, Melrose, North Reading, Reading, Stoneham, Wakefield, Waltham, Watertown, Weston, Wilmington, Winchester, and Woburn.", 'has_po_box': False, 'phone': "(978) 689-7833", 'fax': "", 'address': { 'city': "Woburn", 'address': "200 Trade Center", 'unit': "Courtroom 540 - 5th Floor", 'state': "MA", 'zip': "01801", 'county': "Middlesex County", 'orig_address': "200 Trade Center, Courtroom 540 - 5th Floor, Woburn, MA 01801" }, 'location': { 'latitude': 42.500543, 'longitude': -71.1656604 } } courts.append(court) courts.sort( key=lambda k: k['name']) # We want to sort within category of court return courts
def get_courts_from_massgov_url(url, shim_ehc_middlesex=True, shim_nhc_woburn=True): """Load specified court directory page on Mass.gov and returns an MACourtList Properties include name, phone, fax, address, description (usually includes cities or county served), latitude, longitude """ page = requests.get(url) soup = bs4.BeautifulSoup(page.text, 'html.parser') jstring = soup.find_all( attrs={"data-drupal-selector":"drupal-settings-json"} )[0].text # this is the element that has the JSON data as of 6/19/2018 jdata = json.loads(jstring) markers = jdata['locations']['googleMap']['markers'] courts = [] # The address and description are in a different part of the JSON for marker in markers: html_name = marker['infoWindow']['name'] for item in jdata['locations']['imagePromos']['items']: description = '' if item['title']['text'] in html_name: name = item['title']['text'] description = item['description']['richText']['rteElements'][0]['data']['rawHtml']['content']['#context']['value'] break address = Address() orig_address = marker['infoWindow']['address'] # The geolocate method does _not_ work with PO Boxes (silently discards) clean_address = re.sub(r' *PO Box .*?,',"",orig_address) has_po_box = not clean_address == orig_address # We want to track if there was a PO Box where mail should be delivered address.address = orig_address if address.address == '': address.city = '' address.state = '' address.zip = '' address.county = '' address.unit = '' else: address.geolocate(clean_address) if not hasattr(address,'address'): address.address = '' if not hasattr(address, 'city'): address.city = '' if not hasattr(address, 'state'): address.state = '' if not hasattr(address, 'zip'): address.zip = '' if not hasattr(address, 'county'): address.county = '' #if not hasattr(address, 'unit'): #address.unit = '' # store the data in a serializable format. maybe could refactor to use object_hooks, but would need to go all the way down to DAObject? court = { 'name': name, 'description': description, 'has_po_box' : has_po_box, 'phone':marker['infoWindow']['phone'], 'fax':marker['infoWindow']['fax'], 'address': { 'city': address.city, 'address': address.address, 'state': address.state, 'zip': address.zip, 'county': address.county, 'orig_address': orig_address # the one-line original address, which may include a PO Box }, 'location': { 'latitude': marker['position']['lat'], 'longitude': marker['position']['lng'] } } if hasattr(address, 'unit'): court['address']['unit']= address.unit courts.append(court) if shim_ehc_middlesex and url == 'https://www.mass.gov/orgs/housing-court/locations': court = { 'name': "Eastern Housing Court - Middlesex Session", 'description': "The Middlesex Session of the Eastern Housing Court serves Arlington, Belmont, and Cambridge, Medford and Somerville", 'has_po_box' : False, 'phone': "(781) 306-2715", 'fax':"", 'address': { 'city': "Medford", 'address': "4040 Mystic Valley Parkway", 'state': "MA", 'zip': "02155", 'county': "Middlesex", 'orig_address': "4040 Mystic Valley Parkway, Medford, MA 02155" }, 'location': { 'latitude': 42.4048336, 'longitude': -71.0893853 } } courts.append(court) if shim_nhc_woburn and url == 'https://www.mass.gov/orgs/housing-court/locations': court = { 'name': "Northeast Housing Court - Woburn Session", 'description': "The Woburn session of the Northeast Housing Court serves Bedford, Burlington, Concord, Everett,Lexington, Lincoln, Malden, Melrose, North Reading, Reading, Stoneham, Wakefield, Waltham, Watertown, Weston, Wilmington, Winchester, and Woburn.", 'has_po_box' : False, 'phone': "(978) 689-7833", 'fax':"", 'address': { 'city': "Woburn", 'address': "200 Trade Center", 'unit': "Courtroom 540 - 5th Floor", 'state': "MA", 'zip': "01801", 'county': "Middlesex", 'orig_address': "200 Trade Center, Courtroom 540 - 5th Floor, Woburn, MA 01801" }, 'location': { 'latitude': 42.500543, 'longitude': -71.1656604 } } courts.append(court) courts.sort(key=lambda k: k['name']) # We want to sort within category of court return courts