示例#1
0
    def get_tags(element):
        """get tags under nodes and ways into proper format"""
        tags = []
        id_num = element.attrib['id']
        for child in element.iter('tag'):
            attr = child.attrib

            # check for problematic characters first and skip if matches
            if PROBLEMCHARS.search(attr['k']):
                continue

            child_dict = {}
            child_dict['id'] = id_num
            child_dict['value'] = attr['v']

            # stackoverflow.com/questions/6903557/splitting-on-first-occurrence
            child_dict['key'] = attr['k'].split(':', 1)[-1]

            # Check if the k tag has : in it and treat according to specs
            if LOWER_COLON.search(attr['k']):
                child_dict['type'] = attr['k'].split(':')[0]
            else:
                child_dict['type'] = default_tag_type

            # street name check (not all : matches are addr:)
            if child_dict['type'] == 'addr' & child_dict['key'] == 'street':
                child_dict['value'] = update_street_name(child_dict['value'])

            tags.append(child_dict)

        return tags
示例#2
0
def load_new_tag(element, secondary, default_tag_type):
    """
    Load a new tag dict to go into the list of dicts for way_tags, node_tags
    """
    new = {}
    new['id'] = element.attrib['id']
    if ":" not in secondary.attrib['k']:
        new['key'] = secondary.attrib['k']
        new['type'] = default_tag_type
    else:
        post_colon = secondary.attrib['k'].index(":") + 1
        new['key'] = secondary.attrib['k'][post_colon:]
        new['type'] = secondary.attrib['k'][:post_colon - 1]

    # Cleaning and loading values of various keys
    if is_street_name(secondary):
        street_name = update_street_name(secondary.attrib['v'])
        new['value'] = street_name

    elif is_city_name(secondary):
        city_name = update_city_name(secondary.attrib['v'])
        new['value'] = city_name

    elif is_phone_number(secondary):
        phonenum = update_phone_num(secondary.attrib['v'])
        new['value'] = phonenum

    elif is_postcode_number(secondary):
        postcode = update_postcode(secondary.attrib['v'])
        new['value'] = postcode
    else:
        new['value'] = secondary.attrib['v']

    return new
def shape_element(element):
    """
    Convert data from openstreetmap in format that can be accessed and used by MongodDB via JSON file
    """
    node = {}
    if element.tag == "node" or element.tag == "way":
        node["type"] = element.tag
        for k, v in element.attrib.items():  # Attribute
            if k in CREATED:
                if "created" not in node:
                    node["created"] = {}
                node["created"][k] = v
            elif k == "lat":
                if "pos" not in node:
                    node["pos"] = [float(v), None]
                node["pos"][0] = float(v)
            elif k == "lon":
                if "pos" not in node:
                    node["pos"] = [None, float(v)]
                node["pos"][1] = float(v)
            else:
                node[k] = v
        for sub_elem in element.iter():  # Sub-tree
            if sub_elem.tag == "tag":
                key = sub_elem.attrib["k"]
                if re.search(problemchars, key):  #Skip problematic chars
                    continue
                if re.search(address_1,
                             key):  # Start with addr, and only has one ":"
                    if "address" not in node:
                        node["address"] = {}
                    if key[5:] == "street":  # Update street
                        node["address"]["street"] = audit.update_street_name(
                            sub_elem.attrib["v"], mapping)
                    else:
                        node["address"][key[5:]] = sub_elem.attrib["v"]
                elif re.search(address_2, key):
                    continue
                elif key == "phone":  # Convert telephone format
                    node["phone"] = audit.update_phone(sub_elem.attrib["v"])
                else:
                    node[key] = sub_elem.attrib["v"]
            elif sub_elem.tag == "nd":
                if "node_refs" not in node:
                    node["node_refs"] = []
                node["node_refs"].append(sub_elem.attrib["ref"])

        # print node
        return node
    else:
        return None
def shape_element(element):
    node = {}
    if element.tag == "node" or element.tag == "way" :
        # YOUR CODE HERE
        node["type"] = element.tag
        for k, v in element.attrib.items(): # Attribute
            if k in CREATED:
                if "created" not in node:
                    node["created"] = {}
                node["created"][k] = v
            elif k == "lat":
                if "pos" not in node:
                    node["pos"] = [float(v), None]
                node["pos"][0] = float(v)
            elif k == "lon":
                if "pos" not in node:
                    node["pos"] = [None, float(v)]
                node["pos"][1] = float(v)
            else:
                node[k] = v
        for sub_elem in element.iter():     # Sub-tree
            if sub_elem.tag == "tag":
                key = sub_elem.attrib["k"]
                if re.search(problemchars, key):    #Skip problematic chars
                    continue
                if re.search(address_1, key):   # Start with addr, and only has one ":"
                    if "address" not in node:
                        node["address"] = {}
                    if key[5:] == "street":     # Update street
                        node["address"]["street"] = audit.update_street_name(sub_elem.attrib["v"])
                    else:
                        node["address"][key[5:]] = sub_elem.attrib["v"]



                elif re.search(address_2, key):
                    continue
                elif key == "phone":    # Convert telephone format
                    node["phone"] = audit.update_phone(sub_elem.attrib["v"])
                else:
                    node[key] = sub_elem.attrib["v"]
            elif sub_elem.tag == "nd":
                if "node_refs" not in node:
                    node["node_refs"] = []
                node["node_refs"].append(sub_elem.attrib["ref"])

        # print node
        return node
    else:
        return None
示例#5
0
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS, default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = []  # Handle secondary tags the same way for both node and way elements#

    if element.tag == 'node':
    	for attrib in element.attrib:
    		if attrib in NODE_FIELDS:
    			node_attribs[attrib] = element.attrib[attrib]
        
        # For elements within the top element #
        for child in element:
        	node_tag = {}
        	""" if the tag "k" value contains a ":" the characters, 
        	before the ":" should be set as the tag type and characters after the ":" should be set as the tag key """
        	if LOWER_COLON.match(child.attrib['k']):
        		node_tag['id'] = element.attrib['id']
        		node_tag['type'] = child.attrib['k'].split(':', 1)[0]
        		node_tag['key'] = child.attrib['k'].split(':', 1)[1]
        		
        		# If key is street, update street name #
        		if node_tag['key'] == 'street':
        			node_tag['value'] = update_street_name(child.attrib['v'], mapping)
				# If key is postcode, update postal code #
        		elif node_tag['key'] == 'postcode':
        			node_tag['value'] = update_postal_code(child.attrib['v'])

        		else:
        			node_tag['value'] = child.attrib['v']
        		
        		tags.append(node_tag)
        	"""if the tag "k" value contains problematic characters, the tag should be ignored"""
        	elif PROBLEMCHARS.match(child.attrib['k']):
        		continue
        	
        	else:
        		node_tag['id'] = element.attrib['id']
        		node_tag['type'] = 'regular'
        		node_tag['key'] = child.attrib['k']
        		node_tag['value'] = child.attrib['v']
        		tags.append(node_tag)
示例#6
0
def shape_element(element,
                  node_attr_fields=NODE_FIELDS,
                  way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS,
                  default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = []

    # Counter initialize for 'nd' tags
    i = 0

    # 'Node' attributes
    if element.tag == 'node':
        flag = "node"
        for a in element.attrib:
            if a in NODE_FIELDS:
                node_attribs[a] = element.attrib[a]

    # 'Way' attributes
    elif element.tag == 'way':
        flag = "way"
        for w in element.attrib:
            if w in WAY_FIELDS:
                way_attribs[w] = element.attrib[w]

    # Children tags
    for child in element:

        # 'Ways_nodes' list
        if child.tag == "nd":
            ways_node_dict = {}
            ways_node_dict['id'] = way_attribs['id']
            ways_node_dict['node_id'] = child.attrib['ref']
            ways_node_dict['position'] = i
            i += 1
            way_nodes.append(ways_node_dict)

        # Create dictionary for each 'tag' tag
        if child.tag == "tag":
            if PROBLEMCHARS.match(child.attrib["k"]):
                continue

            # Dictionary of 'tag' tags to skip
            erroneous_tags = {"phone": "fire"}
            if (child.attrib["k"] in erroneous_tags) and (
                    child.attrib["v"] == erroneous_tags[child.attrib["k"]]):
                continue

            else:
                child_dict = {}
                if flag == "node":
                    child_dict['id'] = node_attribs['id']
                if flag == "way":
                    child_dict['id'] = way_attribs['id']

                for k, v in child.attrib.items():
                    if k == 'k':
                        if LOWER_COLON.match(v):
                            child_dict["key"] = v[v.find(':') + 1:]
                            child_dict["type"] = v[:v.find(':')]
                        else:
                            child_dict["key"] = v
                            child_dict["type"] = default_tag_type

                    if k == 'v':
                        '''CLEANING DATA FOR EXPORT TO CSV'''

                        # Cleaning street names
                        if child_dict["key"] == "street":
                            child_dict["value"] = audit.update_street_name(v)

                        # Cleaning phone numbers
                        elif child_dict["key"] == "phone":
                            child_dict["value"] = audit.update_phoneNum(v)

                        # Cleaning zip codes
                        elif child_dict["key"] == "postcode":
                            child_dict["value"] = audit.update_zipcode(v)

                        # If not identified for cleaning, as-is value is returned
                        else:
                            child_dict["value"] = v

                tags.append(child_dict)

    # Return data structure
    if flag == "node":
        return {'node': node_attribs, 'node_tags': tags}
    elif flag == "way":
        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
def shape_element(element,
                  node_attr_fields=NODE_FIELDS,
                  way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS,
                  default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = [
    ]  # Handle secondary tags the same way for both node and way elements

    #Iterate through node tags
    if element.tag == 'node':
        for attrib in element.attrib:
            node_attribs[attrib] = element.attrib[attrib]

        for child in element.iter("tag"):
            node_tag = {}

            if problem_chars.match(child.attrib['k']):
                continue

            elif LOWER_COLON.match(child.attrib['k']):
                node_tag['id'] = element.attrib['id']
                node_tag['key'] = child.attrib['k'].split(':', 1)[1]
                if node_tag['key'] == "street":
                    node_tag['value'] = a.update_street_name(child.attrib['v'])
                elif node_tag['key'] == "postcode":
                    node_tag['value'] = a.update_postcodes(child.attrib['v'])
                else:
                    node_tag['value'] = child.attrib['v']
                node_tag['type'] = child.attrib['k'].split(':', 1)[0]
                tags.append(node_tag)

            else:
                node_tag['id'] = element.attrib['id']
                node_tag['key'] = child.attrib['k']
                if node_tag['key'] == "street":
                    node_tag['value'] = a.update_street_name(child.attrib['v'])
                elif node_tag['key'] == "postcode":
                    node_tag['value'] = a.update_postcodes(child.attrib['v'])
                else:
                    node_tag['value'] = child.attrib['v']
                node_tag['type'] = default_tag_type
                tags.append(node_tag)

        return {'node': node_attribs, 'node_tags': tags}

    #Iterate through way tags
    elif element.tag == 'way':
        for attrib in element.attrib:
            way_attribs[attrib] = element.attrib[attrib]

        for child in element:
            way_tag = {}
            way_node = {}
            position = 0

            if child.tag == 'tag':
                if problem_chars.match(child.attrib['k']):
                    continue

                elif LOWER_COLON.match(child.attrib['k']):
                    way_tag['id'] = element.attrib['id']
                    way_tag['key'] = child.attrib['k'].split(':', 1)[1]
                    if way_tag['key'] == "street":
                        way_tag['value'] = a.update_street_name(
                            child.attrib['v'])
                    elif way_tag['key'] == "postcode":
                        way_tag['value'] = a.update_postcodes(
                            child.attrib['v'])
                    else:
                        way_tag['value'] = child.attrib['v']
                    way_tag['type'] = child.attrib['k'].split(':', 1)[0]
                    tags.append(way_tag)

                else:
                    way_tag['id'] = element.attrib['id']
                    way_tag['key'] = child.attrib['k']
                    if way_tag['key'] == "street":
                        way_tag['value'] = a.update_street_name(
                            child.attrib['v'])
                    elif way_tag['key'] == "postcode":
                        way_tag['value'] = a.update_postcodes(
                            child.attrib['v'])
                    else:
                        way_tag['value'] = child.attrib['v']
                    way_tag['type'] = default_tag_type
                    tags.append(way_tag)

            elif child.tag == 'nd':
                way_node['id'] = element.attrib['id']
                way_node['node_id'] = child.attrib['ref']
                way_node['position'] = position
                position += 1
                way_nodes.append(way_node)

        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
示例#8
0
    			way_attribs[attrib] = element.attrib[attrib]

    	position = 0

    	for child in element:
    		way_tag = {}
    		way_node = {}

    		if child.tag == 'tag':
    			if LOWER_COLON.match(child.attrib['k']):
    				way_tag['id'] = element.attrib['id']
    				way_tag['type'] = child.attrib['k'].split(':', 1)[0]
    				way_tag['key'] = child.attrib['k'].split(':', 1)[1]

        			if way_tag['key'] == 'street':
        				way_tag['value'] = update_street_name(child.attrib['v'], mapping)

        			elif way_tag['key'] == 'postcode':
        				way_tag['value'] = update_postal_code(child.attrib['v'])    				
    				
    				else:
    					way_tag['value'] = child.attrib['v']

    				tags.append(way_tag)

    			elif PROBLEMCHARS.match(child.attrib['k']):
    				continue

    			else:
    				way_tag['id'] = element.attrib['id']
    				way_tag['type'] = 'regular'
示例#9
0
def shape_element(element,
                  node_attr_fields=NODE_FIELDS,
                  way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS,
                  default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = [
    ]  # Handle secondary tags the same way for both node and way elements
    tag_dict = {}
    # YOUR CODE HERE
    if element.tag == 'node':
        for node in NODE_FIELDS:
            node_attribs[node] = element.attrib[node]
        node_attribs['id'] = int(node_attribs['id'])
        node_attribs['uid'] = int(node_attribs['uid'])
        node_attribs['changeset'] = int(node_attribs['changeset'])
        node_attribs['lon'] = float(node_attribs['lon'])
        node_attribs['lat'] = float(node_attribs['lat'])

        for child in element.iter('tag'):
            if PROBLEMCHARS.search(child.attrib['k']) == None and \
            PROBLEMCHARS.search(child.attrib['v']) == None:
                tag_dict['id'] = int(node_attribs['id'])
                if ":" not in child.attrib['k']:
                    tag_dict['key'] = child.attrib['k']
                    tag_dict['type'] = 'regular'
                else:
                    pcolon = child.attrib['k'].index(":") + 1
                    tag_dict['key'] = child.attrib['k'][pcolon:]
                    tag_dict['type'] = child.attrib['k'][:pcolon - 1]

                if child.attrib['k'] == "addr:street":
                    tag_dict['value'] = update_street_name(child.attrib['v'])
                elif child.attrib['k'] == "phone":
                    tag_dict['value'] = update_phone_num(child.attrib['v'])
                # because all the postcodes are already in the correct format so we do not need to update them
                else:
                    tag_dict['value'] = child.attrib['v']

                tags.append(tag_dict)

        return {'node': node_attribs, 'node_tags': tags}

    elif element.tag == 'way':
        for way in WAY_FIELDS:
            way_attribs[way] = element.attrib[way]
        way_attribs['id'] = int(way_attribs['id'])
        way_attribs['uid'] = int(way_attribs['uid'])
        way_attribs['changeset'] = int(way_attribs['changeset'])

        for child in element.iter('tag'):
            if PROBLEMCHARS.search(child.attrib['k']) == None and \
            PROBLEMCHARS.search(child.attrib['v']) == None:
                tag_dict['id'] = int(way_attribs['id'])
                tag_dict['key'] = child.attrib['k']
                tag_dict['type'] = 'regular'
                if child.attrib['k'] == "addr:street":
                    tag_dict['value'] = update_street_name(child.attrib['v'])
                elif child.attrib['k'] == "phone":
                    tag_dict['value'] = update_phone_num(child.attrib['v'])
                else:
                    tag_dict['value'] = child.attrib['v']
                tags.append(tag_dict)

        count = 0
        for child in element.iter('nd'):
            wnd = {}
            wnd['id'] = int(way_attribs['id'])
            wnd['node_id'] = int(child.attrib['ref'])
            wnd['position'] = count
            count += 1
            way_nodes.append(wnd)

        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
示例#10
0
def shape_element(element,
                  node_attr_fields=NODE_FIELDS,
                  way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS,
                  tag_attr_fields=TAGS_FIELDS,
                  way_node_attr_fields=WAY_NODES_FIELDS,
                  default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = [
    ]  # Handle secondary tags the same way for both node and way elements

    if element.tag == 'node':
        for attribute in node_attr_fields:  # shape the node
            if attribute not in element.attrib:
                node_attribs[attribute] = "None"
            else:
                node_attribs[attribute] = element.attrib[attribute]

        for child in element._children:  # shape the node tags
            if child.tag == 'tag':
                if problem_chars.search(
                        child.attrib['k']):  # handles problematic characters
                    break
                tag_attribs = dict.fromkeys(tag_attr_fields)
                tag_attribs['id'] = element.attrib[
                    'id']  # get the id from the root element
                key = child.attrib['k']
                colon_pos = child.attrib['k'].find(
                    ':')  # handles first colon (if it exists)
                if colon_pos < 0:
                    tag_attribs['key'] = key
                    tag_attribs['type'] = default_tag_type
                else:
                    tag_attribs['key'] = key[:colon_pos]
                    tag_attribs['type'] = key[colon_pos + 1:]

                if is_street_name(
                        child
                ):  # update the problematic values, using our audit.py functions
                    tag_attribs['value'] = update_street_name(
                        child.attrib['v'])
                elif is_postcode(child):
                    tag_attribs['value'] = update_postcode(child.attrib['v'])
                else:
                    tag_attribs['value'] = child.attrib['v']
                tags.append(
                    tag_attribs)  # add the iteration tag dict to the tags list
        return {'node': node_attribs, 'node_tags': tags}

    elif element.tag == 'way':  # shape the way
        for attribute in way_attr_fields:
            if attribute not in element.attrib:
                way_attribs[attribute] = "None"
            else:
                way_attribs[attribute] = element.attrib[attribute]

        for i, child in enumerate(element._children):  # shape the way tags
            if child.tag == 'tag':
                if problem_chars.search(
                        child.attrib['k']):  # handles problematic characters
                    continue
                tag_attribs = dict.fromkeys(tag_attr_fields)
                tag_attribs['id'] = element.attrib[
                    'id']  # get the id from the root element
                key = child.attrib['k']
                colon_pos = child.attrib['k'].find(
                    ':')  # handles first colon (if it exists)
                if colon_pos < 0:
                    tag_attribs['key'] = key
                    tag_attribs['type'] = default_tag_type
                else:
                    tag_attribs['key'] = key[:colon_pos]
                    tag_attribs['type'] = key[colon_pos + 1:]

                if is_street_name(
                        child
                ):  # update the problematic values, using our audit.py functions
                    tag_attribs['value'] = update_street_name(
                        child.attrib['v'])
                elif is_postcode(child):
                    tag_attribs['value'] = update_postcode(child.attrib['v'])
                else:
                    tag_attribs['value'] = child.attrib['v']
                tags.append(
                    tag_attribs)  # add the iteration tag dict to the tags list

            elif child.tag == 'nd':  # shape the way nodes
                way_node_attribs = dict.fromkeys(way_node_attr_fields)
                way_node_attribs['id'] = element.attrib['id']
                way_node_attribs['node_id'] = child.attrib['ref']
                way_node_attribs['position'] = i
                way_nodes.append(way_node_attribs)
        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
示例#11
0
def shape_element(element):
    """
    Convert an XML element into a multi-level dictionary
    :param element: XML element to parse
    :return: Multi-level dictionary suitable for JSON output
    """
    node = {}
    if element.tag == "node" or element.tag == "way":
        # add the tag type to the node dictionary
        node['type'] = element.tag

        # Dump all the attributes from the element into the dictionary
        node.update(element.attrib)

        # Pop CREATED items into the 'created' sub-dictionary
        node['created'] = {k: node.pop(k) for k in CREATED}

        # Capture the latitude and longitude if present
        if 'lat' in node and 'lon' in node:
            node['pos'] = [float(node.pop('lon')), float(node.pop('lat'))]

        # Get the second level tags
        for tag in element.findall('tag'):
            # grab the k and v attributes
            k, v = tag.attrib['k'], tag.attrib['v']

            # Rename keys already in node to ensure they aren't overwritten
            if k in node:
                k = 'tag_' + k

            # skip tag if key contains problem characters
            if problemchars.search(k): continue

            # Break addresses down into a sub-dictionary
            if "addr:" in k:
                if 'address' not in node:
                    node['address'] = {}

                # Parse second portion of name for sub-tdictionary key
                _, k = k.split(':', 1)

                # Replace the remaining colons with underscores
                k = k.replace(':', '_')

                # Special handling for street addresses, using the audit functions
                if k == "street":
                    if audit.over_abbr_re.search(v):
                        v = audit.update_short_name(v)
                    else:
                        v = audit.update_street_name(v)
                node['address'][k] = v
            # deal with remaining tags
            else:
                # Replace any colons with underscores
                k = k.replace(':', '_')

                # Update format of cuisine and phone tag types
                if k == "cuisine":
                    # Python magic: split v to a list and apply update_cuisine to each item
                    v = map(audit.update_cuisine, v.split(';'))
                elif k == "phone":
                    v = audit.update_number(v)
                    if not v:  # When phone number was invalid
                        continue

                # Save the tag
                node[k] = v

        # store nd ref tags for ways
        if element.tag == "way":
            node['node_refs'] = [
                nd.attrib['ref'] for nd in element.findall('nd')
            ]

        return node
    else:
        return None