def key_type(element): ''' If the given element is a tag, returns which of 6 types the key attribute belongs to: - lower: The key contains only lowercase letters and underscores. - lower_colon: The key contains only lowercase letters, underscores, and colons. - alpha_with_upper: The key contains only lettrs and underscores, and at least on letter is uppercase. - word_plus_colon: The key contains only word characters (letters, numbers, and underscores), plus colons, but does not fit into any of the above types. - problem_chars: The key contains at least one problematic character, such as a dot or a dollar sign. - other: Anything that does not fit into the above types. ''' if element.tag == "tag": if lower.search(element.attrib['k']): return 'lower' elif lower_colon.search(element.attrib['k']): return 'lower_colon' elif alpha.search(element.attrib['k']): return 'alpha_with_upper' elif word_plus_colon.search(element.attrib['k']): return 'word_plus_colon' elif problemchars.search(element.attrib['k']): return 'problem_chars' else: print 'Unidentified type for key "{}"'.format(element.attrib['k']) return 'other'
def key_type(element): if element.tag == "tag": if lower.search(element.attrib['k']): return 'lower' elif lower_colon.search(element.attrib['k']): return 'lower_colon' elif alpha.search(element.attrib['k']): return 'alpha_with_upper' elif word_plus_colon.search(element.attrib['k']): return 'word_plus_colon' elif problemchars.search(element.attrib['k']): return 'problem_chars' else: print 'Unidentified type for key "{}"'.format(element.attrib['k']) return 'other'
def parse_tags(tags): ''' Parses the given tag elements and returns a node dictionary. Includes a nested address dictionary if appropriate. ''' node = defaultdict(dict) for tag in tags: k, v = tag.attrib['k'], tag.attrib['v'] m = lower_colon.search(k) if m: if m.group(1) == 'addr' and not lower_colon.match(m.group(2)): node['address'][m.group(2)] = v else: node[k] = v return dict(node)