def _parse_digital_forms(self, prop=DIGITAL_FORMS):
        """ Concatenates a list of Digital Form data structures parsed from the metadata """

        xpath_map = self._data_structures[prop]

        # Parse base digital form fields: 'name', 'content', 'decompression', 'version', 'specification'
        xpath_root = self._data_map['_digital_forms_root']
        digital_forms = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop)

        # Parse digital form transfer option fields: 'access_desc', 'access_instrs', 'network_resource'
        xpath_root = self._data_map['_transfer_options_root']
        transfer_opts = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop)

        # Split out digital form content that has been appended to specifications

        content_delim = _DIGITAL_FORMS_CONTENT_DELIM

        for digital_form in digital_forms:
            specs = reduce_value(digital_form['specification'])
            specs = specs.splitlines() if isinstance(specs, string_types) else specs

            specifications = wrap_value(s.strip() for s in specs)

            digital_form['content'] = []
            digital_form['specification'] = []
            has_content = False

            # For each specification, insert delim before appending content
            for spec in specifications:
                has_content = has_content or spec == content_delim

                if not has_content:
                    digital_form['specification'].append(spec)
                elif spec != content_delim:
                    digital_form['content'].append(spec)

            # Reduce spec and content to single string values if possible
            for form_prop in ('content', 'specification'):
                digital_form[form_prop] = reduce_value(filter_empty(digital_form[form_prop], u''))

        # Combine digital forms and transfer options into a single complex struct

        df_len = len(digital_forms)
        to_len = len(transfer_opts)
        parsed_forms = []

        for idx in xrange(0, max(df_len, to_len)):
            digital_form = {}.fromkeys(_iso_definitions[prop], u'')

            if idx < df_len:
                digital_form.update(i for i in digital_forms[idx].items() if i[1])
            if idx < to_len:
                digital_form.update(i for i in transfer_opts[idx].items() if i[1])

            if any(digital_form.values()):
                parsed_forms.append(digital_form)

        return get_default_for_complex(prop, parsed_forms)
    def _parse_digital_forms(self, prop=DIGITAL_FORMS):
        """ Concatenates a list of Digital Form data structures parsed from the metadata """

        xpath_map = self._data_structures[prop]

        # Parse base digital form fields: 'name', 'content', 'decompression', 'version', 'specification'
        xpath_root = self._data_map['_digital_forms_root']
        digital_forms = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop)

        # Parse digital form transfer option fields: 'access_desc', 'access_instrs', 'network_resource'
        xpath_root = self._data_map['_transfer_options_root']
        transfer_opts = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop)

        # Split out digital form content that has been appended to specifications

        content_delim = _DIGITAL_FORMS_CONTENT_DELIM

        for digital_form in digital_forms:
            specs = reduce_value(digital_form['specification'])
            specs = specs.splitlines() if isinstance(specs, string_types) else specs

            specifications = wrap_value(s.strip() for s in specs)

            digital_form['content'] = []
            digital_form['specification'] = []
            has_content = False

            # For each specification, insert delim before appending content
            for spec in specifications:
                has_content = has_content or spec == content_delim

                if not has_content:
                    digital_form['specification'].append(spec)
                elif spec != content_delim:
                    digital_form['content'].append(spec)

            # Reduce spec and content to single string values if possible
            for form_prop in ('content', 'specification'):
                digital_form[form_prop] = reduce_value(filter_empty(digital_form[form_prop], u''))

        # Combine digital forms and transfer options into a single complex struct

        df_len = len(digital_forms)
        to_len = len(transfer_opts)
        parsed_forms = []

        for idx in xrange(0, max(df_len, to_len)):
            digital_form = {}.fromkeys(_iso_definitions[prop], u'')

            if idx < df_len:
                digital_form.update(i for i in digital_forms[idx].items() if i[1])
            if idx < to_len:
                digital_form.update(i for i in transfer_opts[idx].items() if i[1])

            if any(digital_form.values()):
                parsed_forms.append(digital_form)

        return get_default_for_complex(prop, parsed_forms)
Пример #3
0
def update_complex(tree_to_update, xpath_root, xpath_map, prop, values):
    """
    Updates and returns the updated complex Element parsed from tree_to_update.
    :param tree_to_update: the XML tree compatible with element_utils to be updated
    :param xpath_root: the XPATH location of the root of the complex Element
    :param xpath_map: a Dictionary of XPATHs corresponding to the complex structure definition
    :param prop: the property identifying the complex structure to be serialized
    :param values: a Dictionary representing the complex structure to be updated
    """

    remove_element(tree_to_update, xpath_root, True)

    values = reduce_value(values, {})

    if not values:
        # Returns the elements corresponding to property removed from the tree
        updated = update_property(tree_to_update, xpath_root, xpath_root, prop, values)
    else:
        for subprop, value in iteritems(values):
            xpath = xpath_map[subprop]
            value = get_default_for_complex_sub(prop, subprop, value, xpath)
            update_property(tree_to_update, None, xpath, subprop, value)
        updated = get_element(tree_to_update, xpath_root)

    return updated
Пример #4
0
def update_complex(tree_to_update, xpath_root, xpath_map, prop, values):
    """
    Updates and returns the updated complex Element parsed from tree_to_update.
    :param tree_to_update: the XML tree compatible with element_utils to be updated
    :param xpath_root: the XPATH location of the root of the complex Element
    :param xpath_map: a Dictionary of XPATHs corresponding to the complex structure definition
    :param prop: the property identifying the complex structure to be serialized
    :param values: a Dictionary representing the complex structure to be updated
    """

    remove_element(tree_to_update, xpath_root, True)

    values = reduce_value(values, {})

    if not values:
        # Returns the elements corresponding to property removed from the tree
        updated = update_property(tree_to_update, xpath_root, xpath_root, prop,
                                  values)
    else:
        for subprop, value in iteritems(values):
            xpath = xpath_map[subprop]
            value = get_default_for_complex_sub(prop, subprop, value, xpath)
            update_property(tree_to_update, None, xpath, subprop, value)
        updated = get_element(tree_to_update, xpath_root)

    return updated
Пример #5
0
def _update_property(tree_to_update, xpath_root, xpaths, prop, values):
    """
    Default update operation for a single parser property. If xpaths contains one xpath,
    then one element per value will be inserted at that location in the tree_to_update;
    otherwise, the number of values must match the number of xpaths.
    """

    # Inner function to update a specific XPATH with the values provided

    def update_element(elem, idx, root, path, vals):
        """ Internal helper function to encapsulate single item update """

        has_root = bool(root and len(path) > len(root) and path.startswith(root))
        path, attr = get_xpath_tuple(path)  # 'path/@attr' to ('path', 'attr')

        if attr:
            removed = [get_element(elem, path)]
            remove_element_attributes(removed[0], attr)
        elif not has_root:
            removed = wrap_value(remove_element(elem, path))
        else:
            path = get_xpath_branch(root, path)
            removed = [] if idx != 0 else [remove_element(e, path, True) for e in get_elements(elem, root)]

        if not vals:
            return removed

        items = []

        for i, val in enumerate(wrap_value(vals)):
            elem_to_update = elem

            if has_root:
                elem_to_update = insert_element(elem, (i + idx), root)

            val = val.decode('utf-8') if not isinstance(val, string_types) else val
            if not attr:
                items.append(insert_element(elem_to_update, i, path, val))
            else:
                items.append(insert_element(elem_to_update, i, path, **{attr: val}))

        return items

    # Code to update each of the XPATHs with each of the values

    xpaths = reduce_value(xpaths)
    values = filter_empty(values)

    if isinstance(xpaths, string_types):
        return update_element(tree_to_update, 0, xpath_root, xpaths, values)
    else:
        each = []

        for index, xpath in enumerate(xpaths):
            value = values[index] if values else None
            each.extend(update_element(tree_to_update, index, xpath_root, xpath, value))

        return each
Пример #6
0
def get_default_for_complex(prop, value, xpath=''):

    # Ensure sub-props of complex structs and complex lists that take multiple values are wrapped as lists
    val = [
        {k: get_default_for_complex_sub(prop, k, v, xpath) for k, v in iteritems(val)}
        for val in wrap_value(value)
    ]

    return val if prop in _COMPLEX_LISTS else reduce_value(val, {})
Пример #7
0
def get_default_for_complex(prop, value, xpath=''):

    # Ensure sub-props of complex structs and complex lists that take multiple values are wrapped as lists
    val = [{
        k: get_default_for_complex_sub(prop, k, v, xpath)
        for k, v in iteritems(val)
    } for val in wrap_value(value)]

    return val if prop in _COMPLEX_LISTS else reduce_value(val, {})
Пример #8
0
def get_default_for(prop, value):
    """ Ensures complex property types have the correct default values """

    prop = prop.strip('_')     # Handle alternate props (leading underscores)
    val = reduce_value(value)  # Filtering of value happens here

    if prop in _COMPLEX_LISTS:
        return wrap_value(val)
    elif prop in _COMPLEX_STRUCTS:
        return val or {}
    else:
        return u'' if val is None else val
Пример #9
0
def get_default_for(prop, value):
    """ Ensures complex property types have the correct default values """

    prop = prop.strip('_')  # Handle alternate props (leading underscores)
    val = reduce_value(value)  # Filtering of value happens here

    if prop in _COMPLEX_LISTS:
        return wrap_value(val)
    elif prop in _COMPLEX_STRUCTS:
        return val or {}
    else:
        return u'' if val is None else val
    def _parse_report_item(self, prop):
        """ :return: the text for each element at the configured path if type attribute matches"""

        item_type = None

        if prop == 'attribute_accuracy':
            item_type = 'DQQuanAttAcc'
        elif prop == 'dataset_completeness':
            item_type = 'DQCompOm'

        xroot = self._get_xroot_for(prop)

        parsed = (element_to_dict(e) for e in get_elements(self._xml_tree, xroot))
        parsed = flatten_items(e['children'] for e in parsed if e['attributes'].get('type') == item_type)

        return reduce_value([p['text'] for p in parsed if p['name'] == 'measDesc'])
    def _parse_report_item(self, prop):
        """ :return: the text for each element at the configured path if type attribute matches"""

        item_type = None

        if prop == 'attribute_accuracy':
            item_type = 'DQQuanAttAcc'
        elif prop == 'dataset_completeness':
            item_type = 'DQCompOm'

        xroot = self._get_xroot_for(prop)

        parsed = (element_to_dict(e) for e in get_elements(self._xml_tree, xroot))
        parsed = flatten_items(e['children'] for e in parsed if e['attributes'].get('type') == item_type)

        return reduce_value([p['text'] for p in parsed if p['name'] == 'measDesc'])
Пример #12
0
def parse_complex(tree_to_parse, xpath_root, xpath_map, complex_key):
    """
    Creates and returns a Dictionary data structure parsed from the metadata.
    :param tree_to_parse: the XML tree compatible with element_utils to be parsed
    :param xpath_root: the XPATH location of the structure inside the parent element
    :param xpath_map: a dict of XPATHs corresponding to a complex definition
    :param complex_key: indicates which complex definition describes the structure
    """

    complex_struct = {}

    for prop in _complex_definitions.get(complex_key, xpath_map):
        # Normalize complex values: treat values with newlines like values from separate elements
        parsed = parse_property(tree_to_parse, xpath_root, xpath_map, prop)
        parsed = reduce_value(flatten_items(v.split(_COMPLEX_DELIM) for v in wrap_value(parsed)))

        complex_struct[prop] = get_default_for_complex_sub(complex_key, prop, parsed, xpath_map[prop])

    return complex_struct if any(complex_struct.values()) else {}
Пример #13
0
def parse_complex(tree_to_parse, xpath_root, xpath_map, complex_key):
    """
    Creates and returns a Dictionary data structure parsed from the metadata.
    :param tree_to_parse: the XML tree compatible with element_utils to be parsed
    :param xpath_root: the XPATH location of the structure inside the parent element
    :param xpath_map: a dict of XPATHs corresponding to a complex definition
    :param complex_key: indicates which complex definition describes the structure
    """

    complex_struct = {}

    for prop in _complex_definitions.get(complex_key, xpath_map):
        # Normalize complex values: treat values with newlines like values from separate elements
        parsed = parse_property(tree_to_parse, xpath_root, xpath_map, prop)
        parsed = reduce_value(
            flatten_items(v.split(_COMPLEX_DELIM) for v in wrap_value(parsed)))

        complex_struct[prop] = get_default_for_complex_sub(
            complex_key, prop, parsed, xpath_map[prop])

    return complex_struct if any(complex_struct.values()) else {}
Пример #14
0
def _update_property(tree_to_update, xpath_root, xpaths, values):
    """
    Default update operation for a single parser property. If xpaths contains one xpath,
    then one element per value will be inserted at that location in the tree_to_update;
    otherwise, the number of values must match the number of xpaths.
    """

    # Inner function to update a specific XPATH with the values provided

    def update_element(elem, idx, root, path, vals):
        """ Internal helper function to encapsulate single item update """

        has_root = bool(root and len(path) > len(root)
                        and path.startswith(root))
        path, attr = get_xpath_tuple(path)  # 'path/@attr' to ('path', 'attr')

        if attr:
            removed = [get_element(elem, path)]
            remove_element_attributes(removed[0], attr)
        elif not has_root:
            removed = wrap_value(remove_element(elem, path))
        else:
            path = get_xpath_branch(root, path)
            removed = [] if idx != 0 else [
                remove_element(e, path, True)
                for e in get_elements(elem, root)
            ]

        if not vals:
            return removed

        items = []

        for i, val in enumerate(wrap_value(vals)):
            elem_to_update = elem

            if has_root:
                elem_to_update = insert_element(elem, (i + idx), root)

            val = val.decode('utf-8') if not isinstance(val,
                                                        string_types) else val
            if not attr:
                items.append(insert_element(elem_to_update, i, path, val))
            elif path:
                items.append(
                    insert_element(elem_to_update, i, path, **{attr: val}))
            else:
                set_element_attributes(elem_to_update, **{attr: val})
                items.append(elem_to_update)

        return items

    # Code to update each of the XPATHs with each of the values

    xpaths = reduce_value(xpaths)
    values = filter_empty(values)

    if isinstance(xpaths, string_types):
        return update_element(tree_to_update, 0, xpath_root, xpaths, values)
    else:
        each = []

        for index, xpath in enumerate(xpaths):
            value = values[index] if values else None
            each.extend(
                update_element(tree_to_update, index, xpath_root, xpath,
                               value))

        return each