def get_tags(element): """get tags under nodes and ways into proper format""" tags = [] id_num = element.attrib['id'] for child in element.iter('tag'): attr = child.attrib # check for problematic characters first and skip if matches if PROBLEMCHARS.search(attr['k']): continue child_dict = {} child_dict['id'] = id_num child_dict['value'] = attr['v'] # stackoverflow.com/questions/6903557/splitting-on-first-occurrence child_dict['key'] = attr['k'].split(':', 1)[-1] # Check if the k tag has : in it and treat according to specs if LOWER_COLON.search(attr['k']): child_dict['type'] = attr['k'].split(':')[0] else: child_dict['type'] = default_tag_type # street name check (not all : matches are addr:) if child_dict['type'] == 'addr' & child_dict['key'] == 'street': child_dict['value'] = update_street_name(child_dict['value']) tags.append(child_dict) return tags
def load_new_tag(element, secondary, default_tag_type): """ Load a new tag dict to go into the list of dicts for way_tags, node_tags """ new = {} new['id'] = element.attrib['id'] if ":" not in secondary.attrib['k']: new['key'] = secondary.attrib['k'] new['type'] = default_tag_type else: post_colon = secondary.attrib['k'].index(":") + 1 new['key'] = secondary.attrib['k'][post_colon:] new['type'] = secondary.attrib['k'][:post_colon - 1] # Cleaning and loading values of various keys if is_street_name(secondary): street_name = update_street_name(secondary.attrib['v']) new['value'] = street_name elif is_city_name(secondary): city_name = update_city_name(secondary.attrib['v']) new['value'] = city_name elif is_phone_number(secondary): phonenum = update_phone_num(secondary.attrib['v']) new['value'] = phonenum elif is_postcode_number(secondary): postcode = update_postcode(secondary.attrib['v']) new['value'] = postcode else: new['value'] = secondary.attrib['v'] return new
def shape_element(element): """ Convert data from openstreetmap in format that can be accessed and used by MongodDB via JSON file """ node = {} if element.tag == "node" or element.tag == "way": node["type"] = element.tag for k, v in element.attrib.items(): # Attribute if k in CREATED: if "created" not in node: node["created"] = {} node["created"][k] = v elif k == "lat": if "pos" not in node: node["pos"] = [float(v), None] node["pos"][0] = float(v) elif k == "lon": if "pos" not in node: node["pos"] = [None, float(v)] node["pos"][1] = float(v) else: node[k] = v for sub_elem in element.iter(): # Sub-tree if sub_elem.tag == "tag": key = sub_elem.attrib["k"] if re.search(problemchars, key): #Skip problematic chars continue if re.search(address_1, key): # Start with addr, and only has one ":" if "address" not in node: node["address"] = {} if key[5:] == "street": # Update street node["address"]["street"] = audit.update_street_name( sub_elem.attrib["v"], mapping) else: node["address"][key[5:]] = sub_elem.attrib["v"] elif re.search(address_2, key): continue elif key == "phone": # Convert telephone format node["phone"] = audit.update_phone(sub_elem.attrib["v"]) else: node[key] = sub_elem.attrib["v"] elif sub_elem.tag == "nd": if "node_refs" not in node: node["node_refs"] = [] node["node_refs"].append(sub_elem.attrib["ref"]) # print node return node else: return None
def shape_element(element): node = {} if element.tag == "node" or element.tag == "way" : # YOUR CODE HERE node["type"] = element.tag for k, v in element.attrib.items(): # Attribute if k in CREATED: if "created" not in node: node["created"] = {} node["created"][k] = v elif k == "lat": if "pos" not in node: node["pos"] = [float(v), None] node["pos"][0] = float(v) elif k == "lon": if "pos" not in node: node["pos"] = [None, float(v)] node["pos"][1] = float(v) else: node[k] = v for sub_elem in element.iter(): # Sub-tree if sub_elem.tag == "tag": key = sub_elem.attrib["k"] if re.search(problemchars, key): #Skip problematic chars continue if re.search(address_1, key): # Start with addr, and only has one ":" if "address" not in node: node["address"] = {} if key[5:] == "street": # Update street node["address"]["street"] = audit.update_street_name(sub_elem.attrib["v"]) else: node["address"][key[5:]] = sub_elem.attrib["v"] elif re.search(address_2, key): continue elif key == "phone": # Convert telephone format node["phone"] = audit.update_phone(sub_elem.attrib["v"]) else: node[key] = sub_elem.attrib["v"] elif sub_elem.tag == "nd": if "node_refs" not in node: node["node_refs"] = [] node["node_refs"].append(sub_elem.attrib["ref"]) # print node return node else: return None
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS, problem_chars=PROBLEMCHARS, default_tag_type='regular'): """Clean and shape node or way XML element to Python dict""" node_attribs = {} way_attribs = {} way_nodes = [] tags = [] # Handle secondary tags the same way for both node and way elements# if element.tag == 'node': for attrib in element.attrib: if attrib in NODE_FIELDS: node_attribs[attrib] = element.attrib[attrib] # For elements within the top element # for child in element: node_tag = {} """ if the tag "k" value contains a ":" the characters, before the ":" should be set as the tag type and characters after the ":" should be set as the tag key """ if LOWER_COLON.match(child.attrib['k']): node_tag['id'] = element.attrib['id'] node_tag['type'] = child.attrib['k'].split(':', 1)[0] node_tag['key'] = child.attrib['k'].split(':', 1)[1] # If key is street, update street name # if node_tag['key'] == 'street': node_tag['value'] = update_street_name(child.attrib['v'], mapping) # If key is postcode, update postal code # elif node_tag['key'] == 'postcode': node_tag['value'] = update_postal_code(child.attrib['v']) else: node_tag['value'] = child.attrib['v'] tags.append(node_tag) """if the tag "k" value contains problematic characters, the tag should be ignored""" elif PROBLEMCHARS.match(child.attrib['k']): continue else: node_tag['id'] = element.attrib['id'] node_tag['type'] = 'regular' node_tag['key'] = child.attrib['k'] node_tag['value'] = child.attrib['v'] tags.append(node_tag)
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS, problem_chars=PROBLEMCHARS, default_tag_type='regular'): """Clean and shape node or way XML element to Python dict""" node_attribs = {} way_attribs = {} way_nodes = [] tags = [] # Counter initialize for 'nd' tags i = 0 # 'Node' attributes if element.tag == 'node': flag = "node" for a in element.attrib: if a in NODE_FIELDS: node_attribs[a] = element.attrib[a] # 'Way' attributes elif element.tag == 'way': flag = "way" for w in element.attrib: if w in WAY_FIELDS: way_attribs[w] = element.attrib[w] # Children tags for child in element: # 'Ways_nodes' list if child.tag == "nd": ways_node_dict = {} ways_node_dict['id'] = way_attribs['id'] ways_node_dict['node_id'] = child.attrib['ref'] ways_node_dict['position'] = i i += 1 way_nodes.append(ways_node_dict) # Create dictionary for each 'tag' tag if child.tag == "tag": if PROBLEMCHARS.match(child.attrib["k"]): continue # Dictionary of 'tag' tags to skip erroneous_tags = {"phone": "fire"} if (child.attrib["k"] in erroneous_tags) and ( child.attrib["v"] == erroneous_tags[child.attrib["k"]]): continue else: child_dict = {} if flag == "node": child_dict['id'] = node_attribs['id'] if flag == "way": child_dict['id'] = way_attribs['id'] for k, v in child.attrib.items(): if k == 'k': if LOWER_COLON.match(v): child_dict["key"] = v[v.find(':') + 1:] child_dict["type"] = v[:v.find(':')] else: child_dict["key"] = v child_dict["type"] = default_tag_type if k == 'v': '''CLEANING DATA FOR EXPORT TO CSV''' # Cleaning street names if child_dict["key"] == "street": child_dict["value"] = audit.update_street_name(v) # Cleaning phone numbers elif child_dict["key"] == "phone": child_dict["value"] = audit.update_phoneNum(v) # Cleaning zip codes elif child_dict["key"] == "postcode": child_dict["value"] = audit.update_zipcode(v) # If not identified for cleaning, as-is value is returned else: child_dict["value"] = v tags.append(child_dict) # Return data structure if flag == "node": return {'node': node_attribs, 'node_tags': tags} elif flag == "way": return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS, problem_chars=PROBLEMCHARS, default_tag_type='regular'): """Clean and shape node or way XML element to Python dict""" node_attribs = {} way_attribs = {} way_nodes = [] tags = [ ] # Handle secondary tags the same way for both node and way elements #Iterate through node tags if element.tag == 'node': for attrib in element.attrib: node_attribs[attrib] = element.attrib[attrib] for child in element.iter("tag"): node_tag = {} if problem_chars.match(child.attrib['k']): continue elif LOWER_COLON.match(child.attrib['k']): node_tag['id'] = element.attrib['id'] node_tag['key'] = child.attrib['k'].split(':', 1)[1] if node_tag['key'] == "street": node_tag['value'] = a.update_street_name(child.attrib['v']) elif node_tag['key'] == "postcode": node_tag['value'] = a.update_postcodes(child.attrib['v']) else: node_tag['value'] = child.attrib['v'] node_tag['type'] = child.attrib['k'].split(':', 1)[0] tags.append(node_tag) else: node_tag['id'] = element.attrib['id'] node_tag['key'] = child.attrib['k'] if node_tag['key'] == "street": node_tag['value'] = a.update_street_name(child.attrib['v']) elif node_tag['key'] == "postcode": node_tag['value'] = a.update_postcodes(child.attrib['v']) else: node_tag['value'] = child.attrib['v'] node_tag['type'] = default_tag_type tags.append(node_tag) return {'node': node_attribs, 'node_tags': tags} #Iterate through way tags elif element.tag == 'way': for attrib in element.attrib: way_attribs[attrib] = element.attrib[attrib] for child in element: way_tag = {} way_node = {} position = 0 if child.tag == 'tag': if problem_chars.match(child.attrib['k']): continue elif LOWER_COLON.match(child.attrib['k']): way_tag['id'] = element.attrib['id'] way_tag['key'] = child.attrib['k'].split(':', 1)[1] if way_tag['key'] == "street": way_tag['value'] = a.update_street_name( child.attrib['v']) elif way_tag['key'] == "postcode": way_tag['value'] = a.update_postcodes( child.attrib['v']) else: way_tag['value'] = child.attrib['v'] way_tag['type'] = child.attrib['k'].split(':', 1)[0] tags.append(way_tag) else: way_tag['id'] = element.attrib['id'] way_tag['key'] = child.attrib['k'] if way_tag['key'] == "street": way_tag['value'] = a.update_street_name( child.attrib['v']) elif way_tag['key'] == "postcode": way_tag['value'] = a.update_postcodes( child.attrib['v']) else: way_tag['value'] = child.attrib['v'] way_tag['type'] = default_tag_type tags.append(way_tag) elif child.tag == 'nd': way_node['id'] = element.attrib['id'] way_node['node_id'] = child.attrib['ref'] way_node['position'] = position position += 1 way_nodes.append(way_node) return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
way_attribs[attrib] = element.attrib[attrib] position = 0 for child in element: way_tag = {} way_node = {} if child.tag == 'tag': if LOWER_COLON.match(child.attrib['k']): way_tag['id'] = element.attrib['id'] way_tag['type'] = child.attrib['k'].split(':', 1)[0] way_tag['key'] = child.attrib['k'].split(':', 1)[1] if way_tag['key'] == 'street': way_tag['value'] = update_street_name(child.attrib['v'], mapping) elif way_tag['key'] == 'postcode': way_tag['value'] = update_postal_code(child.attrib['v']) else: way_tag['value'] = child.attrib['v'] tags.append(way_tag) elif PROBLEMCHARS.match(child.attrib['k']): continue else: way_tag['id'] = element.attrib['id'] way_tag['type'] = 'regular'
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS, problem_chars=PROBLEMCHARS, default_tag_type='regular'): """Clean and shape node or way XML element to Python dict""" node_attribs = {} way_attribs = {} way_nodes = [] tags = [ ] # Handle secondary tags the same way for both node and way elements tag_dict = {} # YOUR CODE HERE if element.tag == 'node': for node in NODE_FIELDS: node_attribs[node] = element.attrib[node] node_attribs['id'] = int(node_attribs['id']) node_attribs['uid'] = int(node_attribs['uid']) node_attribs['changeset'] = int(node_attribs['changeset']) node_attribs['lon'] = float(node_attribs['lon']) node_attribs['lat'] = float(node_attribs['lat']) for child in element.iter('tag'): if PROBLEMCHARS.search(child.attrib['k']) == None and \ PROBLEMCHARS.search(child.attrib['v']) == None: tag_dict['id'] = int(node_attribs['id']) if ":" not in child.attrib['k']: tag_dict['key'] = child.attrib['k'] tag_dict['type'] = 'regular' else: pcolon = child.attrib['k'].index(":") + 1 tag_dict['key'] = child.attrib['k'][pcolon:] tag_dict['type'] = child.attrib['k'][:pcolon - 1] if child.attrib['k'] == "addr:street": tag_dict['value'] = update_street_name(child.attrib['v']) elif child.attrib['k'] == "phone": tag_dict['value'] = update_phone_num(child.attrib['v']) # because all the postcodes are already in the correct format so we do not need to update them else: tag_dict['value'] = child.attrib['v'] tags.append(tag_dict) return {'node': node_attribs, 'node_tags': tags} elif element.tag == 'way': for way in WAY_FIELDS: way_attribs[way] = element.attrib[way] way_attribs['id'] = int(way_attribs['id']) way_attribs['uid'] = int(way_attribs['uid']) way_attribs['changeset'] = int(way_attribs['changeset']) for child in element.iter('tag'): if PROBLEMCHARS.search(child.attrib['k']) == None and \ PROBLEMCHARS.search(child.attrib['v']) == None: tag_dict['id'] = int(way_attribs['id']) tag_dict['key'] = child.attrib['k'] tag_dict['type'] = 'regular' if child.attrib['k'] == "addr:street": tag_dict['value'] = update_street_name(child.attrib['v']) elif child.attrib['k'] == "phone": tag_dict['value'] = update_phone_num(child.attrib['v']) else: tag_dict['value'] = child.attrib['v'] tags.append(tag_dict) count = 0 for child in element.iter('nd'): wnd = {} wnd['id'] = int(way_attribs['id']) wnd['node_id'] = int(child.attrib['ref']) wnd['position'] = count count += 1 way_nodes.append(wnd) return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS, problem_chars=PROBLEMCHARS, tag_attr_fields=TAGS_FIELDS, way_node_attr_fields=WAY_NODES_FIELDS, default_tag_type='regular'): """Clean and shape node or way XML element to Python dict""" node_attribs = {} way_attribs = {} way_nodes = [] tags = [ ] # Handle secondary tags the same way for both node and way elements if element.tag == 'node': for attribute in node_attr_fields: # shape the node if attribute not in element.attrib: node_attribs[attribute] = "None" else: node_attribs[attribute] = element.attrib[attribute] for child in element._children: # shape the node tags if child.tag == 'tag': if problem_chars.search( child.attrib['k']): # handles problematic characters break tag_attribs = dict.fromkeys(tag_attr_fields) tag_attribs['id'] = element.attrib[ 'id'] # get the id from the root element key = child.attrib['k'] colon_pos = child.attrib['k'].find( ':') # handles first colon (if it exists) if colon_pos < 0: tag_attribs['key'] = key tag_attribs['type'] = default_tag_type else: tag_attribs['key'] = key[:colon_pos] tag_attribs['type'] = key[colon_pos + 1:] if is_street_name( child ): # update the problematic values, using our audit.py functions tag_attribs['value'] = update_street_name( child.attrib['v']) elif is_postcode(child): tag_attribs['value'] = update_postcode(child.attrib['v']) else: tag_attribs['value'] = child.attrib['v'] tags.append( tag_attribs) # add the iteration tag dict to the tags list return {'node': node_attribs, 'node_tags': tags} elif element.tag == 'way': # shape the way for attribute in way_attr_fields: if attribute not in element.attrib: way_attribs[attribute] = "None" else: way_attribs[attribute] = element.attrib[attribute] for i, child in enumerate(element._children): # shape the way tags if child.tag == 'tag': if problem_chars.search( child.attrib['k']): # handles problematic characters continue tag_attribs = dict.fromkeys(tag_attr_fields) tag_attribs['id'] = element.attrib[ 'id'] # get the id from the root element key = child.attrib['k'] colon_pos = child.attrib['k'].find( ':') # handles first colon (if it exists) if colon_pos < 0: tag_attribs['key'] = key tag_attribs['type'] = default_tag_type else: tag_attribs['key'] = key[:colon_pos] tag_attribs['type'] = key[colon_pos + 1:] if is_street_name( child ): # update the problematic values, using our audit.py functions tag_attribs['value'] = update_street_name( child.attrib['v']) elif is_postcode(child): tag_attribs['value'] = update_postcode(child.attrib['v']) else: tag_attribs['value'] = child.attrib['v'] tags.append( tag_attribs) # add the iteration tag dict to the tags list elif child.tag == 'nd': # shape the way nodes way_node_attribs = dict.fromkeys(way_node_attr_fields) way_node_attribs['id'] = element.attrib['id'] way_node_attribs['node_id'] = child.attrib['ref'] way_node_attribs['position'] = i way_nodes.append(way_node_attribs) return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
def shape_element(element): """ Convert an XML element into a multi-level dictionary :param element: XML element to parse :return: Multi-level dictionary suitable for JSON output """ node = {} if element.tag == "node" or element.tag == "way": # add the tag type to the node dictionary node['type'] = element.tag # Dump all the attributes from the element into the dictionary node.update(element.attrib) # Pop CREATED items into the 'created' sub-dictionary node['created'] = {k: node.pop(k) for k in CREATED} # Capture the latitude and longitude if present if 'lat' in node and 'lon' in node: node['pos'] = [float(node.pop('lon')), float(node.pop('lat'))] # Get the second level tags for tag in element.findall('tag'): # grab the k and v attributes k, v = tag.attrib['k'], tag.attrib['v'] # Rename keys already in node to ensure they aren't overwritten if k in node: k = 'tag_' + k # skip tag if key contains problem characters if problemchars.search(k): continue # Break addresses down into a sub-dictionary if "addr:" in k: if 'address' not in node: node['address'] = {} # Parse second portion of name for sub-tdictionary key _, k = k.split(':', 1) # Replace the remaining colons with underscores k = k.replace(':', '_') # Special handling for street addresses, using the audit functions if k == "street": if audit.over_abbr_re.search(v): v = audit.update_short_name(v) else: v = audit.update_street_name(v) node['address'][k] = v # deal with remaining tags else: # Replace any colons with underscores k = k.replace(':', '_') # Update format of cuisine and phone tag types if k == "cuisine": # Python magic: split v to a list and apply update_cuisine to each item v = map(audit.update_cuisine, v.split(';')) elif k == "phone": v = audit.update_number(v) if not v: # When phone number was invalid continue # Save the tag node[k] = v # store nd ref tags for ways if element.tag == "way": node['node_refs'] = [ nd.attrib['ref'] for nd in element.findall('nd') ] return node else: return None