def _parse_digital_forms(self, prop=DIGITAL_FORMS): """ Concatenates a list of Digital Form data structures parsed from the metadata """ xpath_map = self._data_structures[prop] # Parse base digital form fields: 'name', 'content', 'decompression', 'version', 'specification' xpath_root = self._data_map['_digital_forms_root'] digital_forms = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop) # Parse digital form transfer option fields: 'access_desc', 'access_instrs', 'network_resource' xpath_root = self._data_map['_transfer_options_root'] transfer_opts = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop) # Split out digital form content that has been appended to specifications content_delim = _DIGITAL_FORMS_CONTENT_DELIM for digital_form in digital_forms: specs = reduce_value(digital_form['specification']) specs = specs.splitlines() if isinstance(specs, string_types) else specs specifications = wrap_value(s.strip() for s in specs) digital_form['content'] = [] digital_form['specification'] = [] has_content = False # For each specification, insert delim before appending content for spec in specifications: has_content = has_content or spec == content_delim if not has_content: digital_form['specification'].append(spec) elif spec != content_delim: digital_form['content'].append(spec) # Reduce spec and content to single string values if possible for form_prop in ('content', 'specification'): digital_form[form_prop] = reduce_value(filter_empty(digital_form[form_prop], u'')) # Combine digital forms and transfer options into a single complex struct df_len = len(digital_forms) to_len = len(transfer_opts) parsed_forms = [] for idx in xrange(0, max(df_len, to_len)): digital_form = {}.fromkeys(_iso_definitions[prop], u'') if idx < df_len: digital_form.update(i for i in digital_forms[idx].items() if i[1]) if idx < to_len: digital_form.update(i for i in transfer_opts[idx].items() if i[1]) if any(digital_form.values()): parsed_forms.append(digital_form) return get_default_for_complex(prop, parsed_forms)
def update_complex(tree_to_update, xpath_root, xpath_map, prop, values): """ Updates and returns the updated complex Element parsed from tree_to_update. :param tree_to_update: the XML tree compatible with element_utils to be updated :param xpath_root: the XPATH location of the root of the complex Element :param xpath_map: a Dictionary of XPATHs corresponding to the complex structure definition :param prop: the property identifying the complex structure to be serialized :param values: a Dictionary representing the complex structure to be updated """ remove_element(tree_to_update, xpath_root, True) values = reduce_value(values, {}) if not values: # Returns the elements corresponding to property removed from the tree updated = update_property(tree_to_update, xpath_root, xpath_root, prop, values) else: for subprop, value in iteritems(values): xpath = xpath_map[subprop] value = get_default_for_complex_sub(prop, subprop, value, xpath) update_property(tree_to_update, None, xpath, subprop, value) updated = get_element(tree_to_update, xpath_root) return updated
def _update_property(tree_to_update, xpath_root, xpaths, prop, values): """ Default update operation for a single parser property. If xpaths contains one xpath, then one element per value will be inserted at that location in the tree_to_update; otherwise, the number of values must match the number of xpaths. """ # Inner function to update a specific XPATH with the values provided def update_element(elem, idx, root, path, vals): """ Internal helper function to encapsulate single item update """ has_root = bool(root and len(path) > len(root) and path.startswith(root)) path, attr = get_xpath_tuple(path) # 'path/@attr' to ('path', 'attr') if attr: removed = [get_element(elem, path)] remove_element_attributes(removed[0], attr) elif not has_root: removed = wrap_value(remove_element(elem, path)) else: path = get_xpath_branch(root, path) removed = [] if idx != 0 else [remove_element(e, path, True) for e in get_elements(elem, root)] if not vals: return removed items = [] for i, val in enumerate(wrap_value(vals)): elem_to_update = elem if has_root: elem_to_update = insert_element(elem, (i + idx), root) val = val.decode('utf-8') if not isinstance(val, string_types) else val if not attr: items.append(insert_element(elem_to_update, i, path, val)) else: items.append(insert_element(elem_to_update, i, path, **{attr: val})) return items # Code to update each of the XPATHs with each of the values xpaths = reduce_value(xpaths) values = filter_empty(values) if isinstance(xpaths, string_types): return update_element(tree_to_update, 0, xpath_root, xpaths, values) else: each = [] for index, xpath in enumerate(xpaths): value = values[index] if values else None each.extend(update_element(tree_to_update, index, xpath_root, xpath, value)) return each
def get_default_for_complex(prop, value, xpath=''): # Ensure sub-props of complex structs and complex lists that take multiple values are wrapped as lists val = [ {k: get_default_for_complex_sub(prop, k, v, xpath) for k, v in iteritems(val)} for val in wrap_value(value) ] return val if prop in _COMPLEX_LISTS else reduce_value(val, {})
def get_default_for_complex(prop, value, xpath=''): # Ensure sub-props of complex structs and complex lists that take multiple values are wrapped as lists val = [{ k: get_default_for_complex_sub(prop, k, v, xpath) for k, v in iteritems(val) } for val in wrap_value(value)] return val if prop in _COMPLEX_LISTS else reduce_value(val, {})
def get_default_for(prop, value): """ Ensures complex property types have the correct default values """ prop = prop.strip('_') # Handle alternate props (leading underscores) val = reduce_value(value) # Filtering of value happens here if prop in _COMPLEX_LISTS: return wrap_value(val) elif prop in _COMPLEX_STRUCTS: return val or {} else: return u'' if val is None else val
def _parse_report_item(self, prop): """ :return: the text for each element at the configured path if type attribute matches""" item_type = None if prop == 'attribute_accuracy': item_type = 'DQQuanAttAcc' elif prop == 'dataset_completeness': item_type = 'DQCompOm' xroot = self._get_xroot_for(prop) parsed = (element_to_dict(e) for e in get_elements(self._xml_tree, xroot)) parsed = flatten_items(e['children'] for e in parsed if e['attributes'].get('type') == item_type) return reduce_value([p['text'] for p in parsed if p['name'] == 'measDesc'])
def parse_complex(tree_to_parse, xpath_root, xpath_map, complex_key): """ Creates and returns a Dictionary data structure parsed from the metadata. :param tree_to_parse: the XML tree compatible with element_utils to be parsed :param xpath_root: the XPATH location of the structure inside the parent element :param xpath_map: a dict of XPATHs corresponding to a complex definition :param complex_key: indicates which complex definition describes the structure """ complex_struct = {} for prop in _complex_definitions.get(complex_key, xpath_map): # Normalize complex values: treat values with newlines like values from separate elements parsed = parse_property(tree_to_parse, xpath_root, xpath_map, prop) parsed = reduce_value(flatten_items(v.split(_COMPLEX_DELIM) for v in wrap_value(parsed))) complex_struct[prop] = get_default_for_complex_sub(complex_key, prop, parsed, xpath_map[prop]) return complex_struct if any(complex_struct.values()) else {}
def parse_complex(tree_to_parse, xpath_root, xpath_map, complex_key): """ Creates and returns a Dictionary data structure parsed from the metadata. :param tree_to_parse: the XML tree compatible with element_utils to be parsed :param xpath_root: the XPATH location of the structure inside the parent element :param xpath_map: a dict of XPATHs corresponding to a complex definition :param complex_key: indicates which complex definition describes the structure """ complex_struct = {} for prop in _complex_definitions.get(complex_key, xpath_map): # Normalize complex values: treat values with newlines like values from separate elements parsed = parse_property(tree_to_parse, xpath_root, xpath_map, prop) parsed = reduce_value( flatten_items(v.split(_COMPLEX_DELIM) for v in wrap_value(parsed))) complex_struct[prop] = get_default_for_complex_sub( complex_key, prop, parsed, xpath_map[prop]) return complex_struct if any(complex_struct.values()) else {}
def _update_property(tree_to_update, xpath_root, xpaths, values): """ Default update operation for a single parser property. If xpaths contains one xpath, then one element per value will be inserted at that location in the tree_to_update; otherwise, the number of values must match the number of xpaths. """ # Inner function to update a specific XPATH with the values provided def update_element(elem, idx, root, path, vals): """ Internal helper function to encapsulate single item update """ has_root = bool(root and len(path) > len(root) and path.startswith(root)) path, attr = get_xpath_tuple(path) # 'path/@attr' to ('path', 'attr') if attr: removed = [get_element(elem, path)] remove_element_attributes(removed[0], attr) elif not has_root: removed = wrap_value(remove_element(elem, path)) else: path = get_xpath_branch(root, path) removed = [] if idx != 0 else [ remove_element(e, path, True) for e in get_elements(elem, root) ] if not vals: return removed items = [] for i, val in enumerate(wrap_value(vals)): elem_to_update = elem if has_root: elem_to_update = insert_element(elem, (i + idx), root) val = val.decode('utf-8') if not isinstance(val, string_types) else val if not attr: items.append(insert_element(elem_to_update, i, path, val)) elif path: items.append( insert_element(elem_to_update, i, path, **{attr: val})) else: set_element_attributes(elem_to_update, **{attr: val}) items.append(elem_to_update) return items # Code to update each of the XPATHs with each of the values xpaths = reduce_value(xpaths) values = filter_empty(values) if isinstance(xpaths, string_types): return update_element(tree_to_update, 0, xpath_root, xpaths, values) else: each = [] for index, xpath in enumerate(xpaths): value = values[index] if values else None each.extend( update_element(tree_to_update, index, xpath_root, xpath, value)) return each