Пример #1
0
def get_parsed_content(metadata_content):
    """
    Parses any of the following types of content:
    1. XML string or file object: parses XML content
    2. MetadataParser instance: deep copies xml_tree
    3. Dictionary with nested objects containing:
        - name (required): the name of the element tag
        - text: the text contained by element
        - tail: text immediately following the element
        - attributes: a Dictionary containing element attributes
        - children: a List of converted child elements

    :raises InvalidContent: if the XML is invalid or does not conform to a supported metadata standard
    :raises NoContent: If the content passed in is null or otherwise empty

    :return: the XML root along with an XML Tree parsed by and compatible with element_utils
    """

    _import_parsers()  # Prevents circular dependencies between modules

    xml_tree = None

    if metadata_content is None:
        raise NoContent('Metadata has no data')
    else:
        if isinstance(metadata_content, MetadataParser):
            xml_tree = deepcopy(metadata_content._xml_tree)
        elif isinstance(metadata_content, dict):
            xml_tree = get_element_tree(metadata_content)
        else:
            try:
                # Strip name spaces from file or XML content
                xml_tree = get_element_tree(metadata_content)
            except Exception:
                xml_tree = None  # Several exceptions possible, outcome is the same

    if xml_tree is None:
        raise InvalidContent(
            'Cannot instantiate a {parser_type} parser with invalid content to parse',
            parser_type=type(metadata_content).__name__)

    xml_root = get_element_name(xml_tree)

    if xml_root is None:
        raise NoContent('Metadata contains no data')
    elif xml_root not in VALID_ROOTS:
        content = type(metadata_content).__name__
        raise InvalidContent('Invalid root element for {content}: {xml_root}',
                             content=content,
                             xml_root=xml_root)

    return xml_root, xml_tree
    def _init_data_map(self):
        """ OVERRIDDEN: Initialize required ISO-19115 data map with XPATHS and specialized functions """

        if self._data_map is not None:
            return  # Initiation happens once

        # Parse and validate the ISO metadata root

        if self._xml_tree is None:
            iso_root = ISO_ROOTS[0]
        else:
            iso_root = get_element_name(self._xml_tree)

        if iso_root not in ISO_ROOTS:
            raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=iso_root)

        iso_data_map = {'_root': iso_root}
        iso_data_map.update(_iso_tag_roots)
        iso_data_map.update(_iso_tag_formats)

        iso_data_structures = {}

        # Capture and format complex XPATHs

        ad_format = iso_data_map[ATTRIBUTES]
        ft_source = iso_data_map['_attr_src'].replace('/carrierOfCharacteristics/FC_FeatureAttribute', '')

        iso_data_structures[ATTRIBUTES] = format_xpaths(
            _iso_definitions[ATTRIBUTES],

            label=ad_format.format(ad_path='memberName/LocalName'),
            aliases=ad_format.format(ad_path='aliases/LocalName'),  # Not in spec
            definition=ad_format.format(ad_path='definition/CharacterString'),

            # First try to populate attribute definition source from FC_FeatureAttribute
            definition_src=iso_data_map['_attr_src'] + '/organisationName/CharacterString',
            _definition_src=iso_data_map['_attr_src'] + '/individualName/CharacterString',

            # Then assume feature type source is the same as attribute: populate from FC_FeatureType
            __definition_src=ft_source + '/organisationName/CharacterString',
            ___definition_src=ft_source + '/individualName/CharacterString'
        )

        bb_format = iso_data_map[BOUNDING_BOX]
        iso_data_structures[BOUNDING_BOX] = format_xpaths(
            _iso_definitions[BOUNDING_BOX],
            east=bb_format.format(bbox_path='eastBoundLongitude/Decimal'),
            south=bb_format.format(bbox_path='southBoundLatitude/Decimal'),
            west=bb_format.format(bbox_path='westBoundLongitude/Decimal'),
            north=bb_format.format(bbox_path='northBoundLatitude/Decimal')
        )

        ct_format = iso_data_map[CONTACTS]
        iso_data_structures[CONTACTS] = format_xpaths(
            _iso_definitions[CONTACTS],
            name=ct_format.format(ct_path='individualName/CharacterString'),
            organization=ct_format.format(ct_path='organisationName/CharacterString'),
            position=ct_format.format(ct_path='positionName/CharacterString'),
            email=ct_format.format(
                ct_path='contactInfo/CI_Contact/address/CI_Address/electronicMailAddress/CharacterString'
            )
        )

        dt_format = iso_data_map[DATES]
        iso_data_structures[DATES] = {
            DATE_TYPE_MULTIPLE: dt_format.format(type_path='TimeInstant/timePosition'),
            DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TimePeriod/begin/TimeInstant/timePosition'),
            DATE_TYPE_RANGE_END: dt_format.format(type_path='TimePeriod/end/TimeInstant/timePosition'),
            DATE_TYPE_SINGLE: dt_format.format(type_path='TimeInstant/timePosition')  # Same as multiple
        }
        iso_data_structures[DATES][DATE_TYPE_RANGE] = [
            iso_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
            iso_data_structures[DATES][DATE_TYPE_RANGE_END]
        ]

        df_format = iso_data_map[DIGITAL_FORMS]
        iso_data_structures[DIGITAL_FORMS] = format_xpaths(
            _iso_definitions[DIGITAL_FORMS],
            name=df_format.format(df_path='name/CharacterString'),
            content='',  # Not supported in ISO-19115 (appending to spec)
            decompression=df_format.format(df_path='fileDecompressionTechnique/CharacterString'),
            version=df_format.format(df_path='version/CharacterString'),
            specification=df_format.format(df_path='specification/CharacterString'),
            access_desc=iso_data_map['_access_desc'],
            access_instrs=iso_data_map['_access_instrs'],
            network_resource=iso_data_map['_network_resource']
        )

        keywords_structure = {
            'keyword_root': 'MD_Keywords/keyword',
            'keyword_type': 'MD_Keywords/type/MD_KeywordTypeCode',
            'keyword': 'MD_Keywords/keyword/CharacterString'
        }
        for keyword_prop in KEYWORD_PROPS:
            iso_data_structures[keyword_prop] = deepcopy(keywords_structure)

        lw_format = iso_data_map[LARGER_WORKS]
        iso_data_structures[LARGER_WORKS] = format_xpaths(
            _iso_definitions[LARGER_WORKS],
            title=lw_format.format(lw_path='title/CharacterString'),
            edition=lw_format.format(lw_path='edition/CharacterString'),
            origin=iso_data_map['_lw_citation'].format(lw_path='individualName/CharacterString'),
            online_linkage=iso_data_map['_lw_linkage'].format(lw_path='linkage/URL'),
            other_citation=lw_format.format(lw_path='otherCitationDetails/CharacterString'),
            date=lw_format.format(lw_path='editionDate/Date'),
            place=iso_data_map['_lw_contact'].format(lw_path='address/CI_Address/city/CharacterString'),
            info=iso_data_map['_lw_citation'].format(lw_path='organisationName/CharacterString')
        )

        ps_format = iso_data_map[PROCESS_STEPS]
        iso_data_structures[PROCESS_STEPS] = format_xpaths(
            _iso_definitions[PROCESS_STEPS],
            description=ps_format.format(ps_path='description/CharacterString'),
            date=ps_format.format(ps_path='dateTime/DateTime'),
            sources=ps_format.format(
                ps_path='source/LI_Source/sourceCitation/CI_Citation/alternateTitle/CharacterString'
            )
        )

        ri_format = iso_data_map[RASTER_INFO]
        iso_data_structures[RASTER_INFO] = format_xpaths(
            _iso_definitions[RASTER_DIMS],
            type=ri_format.format(ri_path='dimensionName/MD_DimensionNameTypeCode'),
            _type=ri_format.format(ri_path='dimensionName/MD_DimensionNameTypeCode/@codeListValue'),
            size=ri_format.format(ri_path='dimensionSize/Integer'),
            value=ri_format.format(ri_path='resolution/Measure'),
            units=ri_format.format(ri_path='resolution/Measure/@uom')
        )

        # Assign XPATHS and gis_metadata.utils.ParserProperties to data map

        for prop, xpath in iteritems(dict(iso_data_map)):
            if prop == ATTRIBUTES:
                iso_data_map[prop] = ParserProperty(self._parse_attribute_details, self._update_attribute_details)

            elif prop in (CONTACTS, PROCESS_STEPS):
                iso_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)

            elif prop in (BOUNDING_BOX, LARGER_WORKS):
                iso_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex)

            elif prop == DATES:
                iso_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates)

            elif prop == DIGITAL_FORMS:
                iso_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms)

            elif prop in KEYWORD_PROPS:
                iso_data_map[prop] = ParserProperty(self._parse_keywords, self._update_keywords)

            elif prop == RASTER_INFO:
                iso_data_map[prop] = ParserProperty(self._parse_raster_info, self._update_raster_info)

            else:
                iso_data_map[prop] = xpath

        self._data_map = iso_data_map
        self._data_structures = iso_data_structures
    def _init_data_map(self):
        """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """

        if self._data_map is not None:
            return  # Initiation happens once

        # Parse and validate the FGDC metadata root

        if self._xml_tree is None:
            fgdc_root = FGDC_ROOT
        else:
            fgdc_root = get_element_name(self._xml_tree)

        if fgdc_root != FGDC_ROOT:
            raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=fgdc_root)

        fgdc_data_map = {'_root': FGDC_ROOT}
        fgdc_data_structures = {}

        # Capture and format other complex XPATHs

        ad_format = _fgdc_tag_formats[ATTRIBUTES]
        fgdc_data_structures[ATTRIBUTES] = format_xpaths(
            _fgdc_definitions[ATTRIBUTES],
            label=ad_format.format(ad_path='attrlabl'),
            aliases=ad_format.format(ad_path='attalias'),
            definition=ad_format.format(ad_path='attrdef'),
            definition_src=ad_format.format(ad_path='attrdefs')
        )

        bb_format = _fgdc_tag_formats[BOUNDING_BOX]
        fgdc_data_structures[BOUNDING_BOX] = format_xpaths(
            _fgdc_definitions[BOUNDING_BOX],
            east=bb_format.format(bbox_path='eastbc'),
            south=bb_format.format(bbox_path='southbc'),
            west=bb_format.format(bbox_path='westbc'),
            north=bb_format.format(bbox_path='northbc')
        )

        ct_format = _fgdc_tag_formats[CONTACTS]
        fgdc_data_structures[CONTACTS] = format_xpaths(
            _fgdc_definitions[CONTACTS],

            name=ct_format.format(ct_path='cntperp/cntper'),
            _name=ct_format.format(ct_path='cntorgp/cntper'),  # If not in cntperp

            organization=ct_format.format(ct_path='cntperp/cntorg'),
            _organization=ct_format.format(ct_path='cntorgp/cntorg'),  # If not in cntperp

            position=ct_format.format(ct_path='cntpos'),
            email=ct_format.format(ct_path='cntemail')
        )

        dt_format = _fgdc_tag_formats[DATES]
        fgdc_data_structures[DATES] = {
            DATE_TYPE_MULTIPLE: dt_format.format(type_path='mdattim/sngdate/caldate'),
            DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='rngdates/begdate'),
            DATE_TYPE_RANGE_END: dt_format.format(type_path='rngdates/enddate'),
            DATE_TYPE_SINGLE: dt_format.format(type_path='sngdate/caldate')
        }
        fgdc_data_structures[DATES][DATE_TYPE_RANGE] = [
            fgdc_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
            fgdc_data_structures[DATES][DATE_TYPE_RANGE_END]
        ]

        df_format = _fgdc_tag_formats[DIGITAL_FORMS]
        fgdc_data_structures[DIGITAL_FORMS] = format_xpaths(
            _fgdc_definitions[DIGITAL_FORMS],
            name=df_format.format(df_path='digtinfo/formname'),
            content=df_format.format(df_path='digtinfo/formcont'),
            decompression=df_format.format(df_path='digtinfo/filedec'),
            version=df_format.format(df_path='digtinfo/formvern'),
            specification=df_format.format(df_path='digtinfo/formspec'),
            access_desc=df_format.format(df_path='digtopt/onlinopt/oncomp'),
            access_instrs=df_format.format(df_path='digtopt/onlinopt/accinstr'),
            network_resource=df_format.format(df_path='digtopt/onlinopt/computer/networka/networkr')
        )

        lw_format = _fgdc_tag_formats[LARGER_WORKS]
        fgdc_data_structures[LARGER_WORKS] = format_xpaths(
            _fgdc_definitions[LARGER_WORKS],
            title=lw_format.format(lw_path='title'),
            edition=lw_format.format(lw_path='edition'),
            origin=lw_format.format(lw_path='origin'),
            online_linkage=lw_format.format(lw_path='onlink'),
            other_citation=lw_format.format(lw_path='othercit'),
            date=lw_format.format(lw_path='pubdate'),
            place=lw_format.format(lw_path='pubinfo/pubplace'),
            info=lw_format.format(lw_path='pubinfo/publish')
        )

        ps_format = _fgdc_tag_formats[PROCESS_STEPS]
        fgdc_data_structures[PROCESS_STEPS] = format_xpaths(
            _fgdc_definitions[PROCESS_STEPS],
            description=ps_format.format(ps_path='procdesc'),
            date=ps_format.format(ps_path='procdate'),
            sources=ps_format.format(ps_path='srcused')
        )

        # Assign XPATHS and gis_metadata.utils.ParserProperties to fgdc_data_map

        fgdc_data_formats = dict(_fgdc_tag_formats)

        for prop, xpath in iteritems(fgdc_data_formats):
            if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS, PROCESS_STEPS):
                fgdc_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)

            elif prop in (BOUNDING_BOX, LARGER_WORKS):
                fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex)

            elif prop == DATES:
                fgdc_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates)

            else:
                fgdc_data_map[prop] = xpath

        self._data_map = fgdc_data_map
        self._data_structures = fgdc_data_structures
    def _init_data_map(self):
        """ OVERRIDDEN: Initialize required ISO-19115 data map with XPATHS and specialized functions """

        if self._data_map is not None:
            return  # Initiation happens once

        # Parse and validate the ISO metadata root

        if self._xml_tree is None:
            iso_root = ISO_ROOTS[0]
        else:
            iso_root = get_element_name(self._xml_tree)

        if iso_root not in ISO_ROOTS:
            raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=iso_root)

        iso_data_map = {'_root': iso_root}
        iso_data_map.update(_iso_tag_roots)
        iso_data_map.update(_iso_tag_formats)

        iso_data_structures = {}

        # Capture and format complex XPATHs

        ad_format = iso_data_map[ATTRIBUTES]
        ft_source = iso_data_map['_attr_src'].replace('/carrierOfCharacteristics/FC_FeatureAttribute', '')

        iso_data_structures[ATTRIBUTES] = format_xpaths(
            _iso_definitions[ATTRIBUTES],
            label=ad_format.format(ad_path='memberName/LocalName'),
            aliases=ad_format.format(ad_path='aliases/LocalName'),  # Not in spec
            definition=ad_format.format(ad_path='definition/CharacterString'),

            # First try to populate attribute definition source from FC_FeatureAttribute
            definition_src=iso_data_map['_attr_src'] + '/organisationName/CharacterString',
            _definition_src=iso_data_map['_attr_src'] + '/individualName/CharacterString',

            # Then assume feature type source is the same as attribute: populate from FC_FeatureType
            __definition_src=ft_source + '/organisationName/CharacterString',
            ___definition_src=ft_source + '/individualName/CharacterString'
        )

        bb_format = iso_data_map[BOUNDING_BOX]
        iso_data_structures[BOUNDING_BOX] = format_xpaths(
            _iso_definitions[BOUNDING_BOX],
            east=bb_format.format(bbox_path='eastBoundLongitude/Decimal'),
            south=bb_format.format(bbox_path='southBoundLatitude/Decimal'),
            west=bb_format.format(bbox_path='westBoundLongitude/Decimal'),
            north=bb_format.format(bbox_path='northBoundLatitude/Decimal')
        )

        ct_format = iso_data_map[CONTACTS]
        iso_data_structures[CONTACTS] = format_xpaths(
            _iso_definitions[CONTACTS],
            name=ct_format.format(ct_path='individualName/CharacterString'),
            organization=ct_format.format(ct_path='organisationName/CharacterString'),
            position=ct_format.format(ct_path='positionName/CharacterString'),
            email=ct_format.format(
                ct_path='contactInfo/CI_Contact/address/CI_Address/electronicMailAddress/CharacterString'
            )
        )

        dt_format = iso_data_map[DATES]
        iso_data_structures[DATES] = {
            DATE_TYPE_MULTIPLE: dt_format.format(type_path='TimeInstant/timePosition'),
            DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TimePeriod/begin/TimeInstant/timePosition'),
            DATE_TYPE_RANGE_END: dt_format.format(type_path='TimePeriod/end/TimeInstant/timePosition'),
            DATE_TYPE_SINGLE: dt_format.format(type_path='TimeInstant/timePosition')  # Same as multiple
        }
        iso_data_structures[DATES][DATE_TYPE_RANGE] = [
            iso_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
            iso_data_structures[DATES][DATE_TYPE_RANGE_END]
        ]

        df_format = iso_data_map[DIGITAL_FORMS]
        iso_data_structures[DIGITAL_FORMS] = format_xpaths(
            _iso_definitions[DIGITAL_FORMS],
            name=df_format.format(df_path='name/CharacterString'),
            content='',  # Not supported in ISO-19115 (appending to spec)
            decompression=df_format.format(df_path='fileDecompressionTechnique/CharacterString'),
            version=df_format.format(df_path='version/CharacterString'),
            specification=df_format.format(df_path='specification/CharacterString'),
            access_desc=iso_data_map['_access_desc'],
            access_instrs=iso_data_map['_access_instrs'],
            network_resource=iso_data_map['_network_resource']
        )

        keywords_structure = {
            'keyword_root': 'MD_Keywords/keyword',
            'keyword_type': 'MD_Keywords/type/MD_KeywordTypeCode',
            'keyword': 'MD_Keywords/keyword/CharacterString'
        }
        iso_data_structures[KEYWORDS_PLACE] = keywords_structure
        iso_data_structures[KEYWORDS_THEME] = keywords_structure

        lw_format = iso_data_map[LARGER_WORKS]
        iso_data_structures[LARGER_WORKS] = format_xpaths(
            _iso_definitions[LARGER_WORKS],
            title=lw_format.format(lw_path='title/CharacterString'),
            edition=lw_format.format(lw_path='edition/CharacterString'),
            origin=iso_data_map['_lw_citation'].format(lw_path='individualName/CharacterString'),
            online_linkage=iso_data_map['_lw_linkage'].format(lw_path='linkage/URL'),
            other_citation=lw_format.format(lw_path='otherCitationDetails/CharacterString'),
            date=lw_format.format(lw_path='editionDate/Date'),
            place=iso_data_map['_lw_contact'].format(lw_path='address/CI_Address/city/CharacterString'),
            info=iso_data_map['_lw_citation'].format(lw_path='organisationName/CharacterString')
        )

        ps_format = iso_data_map[PROCESS_STEPS]
        iso_data_structures[PROCESS_STEPS] = format_xpaths(
            _iso_definitions[PROCESS_STEPS],
            description=ps_format.format(ps_path='description/CharacterString'),
            date=ps_format.format(ps_path='dateTime/DateTime'),
            sources=ps_format.format(
                ps_path='source/LI_Source/sourceCitation/CI_Citation/alternateTitle/CharacterString'
            )
        )

        # Assign XPATHS and gis_metadata.utils.ParserProperties to data map

        for prop, xpath in iteritems(dict(iso_data_map)):
            if prop == ATTRIBUTES:
                iso_data_map[prop] = ParserProperty(self._parse_attribute_details, self._update_attribute_details)

            elif prop in (CONTACTS, PROCESS_STEPS):
                iso_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)

            elif prop in (BOUNDING_BOX, LARGER_WORKS):
                iso_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex)

            elif prop == DATES:
                iso_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates)

            elif prop == DIGITAL_FORMS:
                iso_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms)

            elif prop in [KEYWORDS_PLACE, KEYWORDS_THEME]:
                iso_data_map[prop] = ParserProperty(self._parse_keywords, self._update_keywords)

            else:
                iso_data_map[prop] = xpath

        self._data_map = iso_data_map
        self._data_structures = iso_data_structures
Пример #5
0
    def _init_data_map(self):
        """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """

        if self._data_map is not None:
            return  # Initiation happens once

        # Parse and validate the ArcGIS metadata root

        if self._xml_tree is None:
            agis_root = ARCGIS_ROOTS[0]  # Default to uncapitalized
        else:
            agis_root = get_element_name(self._xml_tree)

        if agis_root not in ARCGIS_ROOTS:
            raise InvalidContent(
                'Invalid XML root for ArcGIS metadata: {root}', root=agis_root)

        agis_data_map = {'_root': agis_root}
        agis_data_map.update(_agis_tag_formats)

        agis_data_structures = {}

        # Capture and format complex XPATHs

        ad_format = agis_data_map[ATTRIBUTES]
        agis_data_structures[ATTRIBUTES] = format_xpaths(
            _agis_definitions[ATTRIBUTES],
            label=ad_format.format(ad_path='attrlabl'),
            aliases=ad_format.format(ad_path='attalias'),
            definition=ad_format.format(ad_path='attrdef'),
            definition_src=ad_format.format(ad_path='attrdefs'))

        bb_format = agis_data_map[BOUNDING_BOX]
        agis_data_structures[BOUNDING_BOX] = format_xpaths(
            _agis_definitions[BOUNDING_BOX],
            east=bb_format.format(bbox_path='eastBL'),
            south=bb_format.format(bbox_path='southBL'),
            west=bb_format.format(bbox_path='westBL'),
            north=bb_format.format(bbox_path='northBL'))

        ct_format = agis_data_map[CONTACTS]
        agis_data_structures[CONTACTS] = format_xpaths(
            _agis_definitions[CONTACTS],
            name=ct_format.format(ct_path='rpIndName'),
            organization=ct_format.format(ct_path='rpOrgName'),
            position=ct_format.format(ct_path='rpPosName'),
            email=ct_format.format(ct_path='rpCntInfo/cntAddress/eMailAdd'))

        dt_format = agis_data_map[DATES]
        agis_data_structures[DATES] = {
            DATE_TYPE_MULTIPLE:
            dt_format.format(type_path='TM_Instant/tmPosition'),
            '_' + DATE_TYPE_MULTIPLE:
            dt_format.format(type_path='TM_Instant/tmPosition/@date'),
            DATE_TYPE_RANGE_BEGIN:
            dt_format.format(type_path='TM_Period/tmBegin'),
            '_' + DATE_TYPE_RANGE_BEGIN:
            dt_format.format(type_path='TM_Period/tmBegin/@date'),
            DATE_TYPE_RANGE_END:
            dt_format.format(type_path='TM_Period/tmEnd'),
            '_' + DATE_TYPE_RANGE_END:
            dt_format.format(type_path='TM_Period/tmEnd/@date'),

            # Same as multiple dates, but will contain only one
            DATE_TYPE_SINGLE:
            dt_format.format(type_path='TM_Instant/tmPosition'),
            '_' + DATE_TYPE_SINGLE:
            dt_format.format(type_path='TM_Instant/tmPosition/@date')
        }
        agis_data_structures[DATES][DATE_TYPE_RANGE] = [
            agis_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
            agis_data_structures[DATES][DATE_TYPE_RANGE_END]
        ]
        agis_data_structures[DATES]['_' + DATE_TYPE_RANGE] = [
            agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_BEGIN],
            agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_END]
        ]

        df_format = agis_data_map[DIGITAL_FORMS]
        agis_data_structures[DIGITAL_FORMS] = format_xpaths(
            _agis_definitions[DIGITAL_FORMS],
            name=df_format.format(df_path='formatName'),
            content=df_format.format(df_path='formatInfo'),
            decompression=df_format.format(df_path='fileDecmTech'),
            version=df_format.format(df_path='formatVer'),
            specification=df_format.format(df_path='formatSpec'),
            access_desc=agis_data_map['_access_desc'],
            access_instrs=agis_data_map['_access_instrs'],
            network_resource=agis_data_map['_network_resource'])

        lw_format = agis_data_map[LARGER_WORKS]
        agis_data_structures[LARGER_WORKS] = format_xpaths(
            _agis_definitions[LARGER_WORKS],
            title=lw_format.format(lw_path='resTitle'),
            edition=lw_format.format(lw_path='resEd'),
            origin=lw_format.format(lw_path='citRespParty/rpIndName'),
            online_linkage=lw_format.format(
                lw_path='citRespParty/rpCntInfo/cntOnlineRes/linkage'),
            other_citation=lw_format.format(lw_path='otherCitDet'),
            date=lw_format.format(lw_path='date/pubDate'),
            place=lw_format.format(
                lw_path='citRespParty/rpCntInfo/cntAddress/city'),
            info=lw_format.format(lw_path='citRespParty/rpOrgName'))

        ps_format = agis_data_map[PROCESS_STEPS]
        agis_data_structures[PROCESS_STEPS] = format_xpaths(
            _agis_definitions[PROCESS_STEPS],
            description=ps_format.format(ps_path='stepDesc'),
            date=ps_format.format(ps_path='stepDateTm'),
            sources=ps_format.format(ps_path='stepSrc/srcDesc'))

        ri_format = agis_data_map[RASTER_INFO]
        agis_data_structures[RASTER_INFO] = format_xpaths(
            _agis_definitions[RASTER_DIMS],
            type=ri_format.format(ri_path='@type'),
            size=ri_format.format(ri_path='dimSize'),
            value=ri_format.format(ri_path='dimResol/value'),
            units=ri_format.format(ri_path='dimResol/value/@uom'))

        # Assign XPATHS and gis_metadata.utils.ParserProperties to data map

        for prop, xpath in iteritems(dict(agis_data_map)):
            if prop in (ATTRIBUTES, CONTACTS, PROCESS_STEPS):
                agis_data_map[prop] = ParserProperty(self._parse_complex_list,
                                                     self._update_complex_list)

            elif prop in (BOUNDING_BOX, LARGER_WORKS):
                agis_data_map[prop] = ParserProperty(self._parse_complex,
                                                     self._update_complex)

            elif prop in ('attribute_accuracy', 'dataset_completeness'):
                agis_data_map[prop] = ParserProperty(self._parse_report_item,
                                                     self._update_report_item)

            elif prop == DATES:
                agis_data_map[prop] = ParserProperty(self._parse_dates,
                                                     self._update_dates)

            elif prop == DIGITAL_FORMS:
                agis_data_map[prop] = ParserProperty(
                    self._parse_digital_forms, self._update_digital_forms)

            elif prop == RASTER_INFO:
                agis_data_map[prop] = ParserProperty(self._parse_raster_info,
                                                     self._update_raster_info)

            else:
                agis_data_map[prop] = xpath

        self._data_map = agis_data_map
        self._data_structures = agis_data_structures
    def _init_data_map(self):
        """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """

        if self._data_map is not None:
            return  # Initiation happens once

        # Parse and validate the ArcGIS metadata root

        if self._xml_tree is None:
            agis_root = ARCGIS_ROOTS[0]  # Default to uncapitalized
        else:
            agis_root = get_element_name(self._xml_tree)

        if agis_root not in ARCGIS_ROOTS:
            raise InvalidContent('Invalid XML root for ArcGIS metadata: {root}', root=agis_root)

        agis_data_map = {'_root': agis_root}
        agis_data_map.update(_agis_tag_formats)

        agis_data_structures = {}

        # Capture and format complex XPATHs

        ad_format = agis_data_map[ATTRIBUTES]
        agis_data_structures[ATTRIBUTES] = format_xpaths(
            _agis_definitions[ATTRIBUTES],
            label=ad_format.format(ad_path='attrlabl'),
            aliases=ad_format.format(ad_path='attalias'),
            definition=ad_format.format(ad_path='attrdef'),
            definition_src=ad_format.format(ad_path='attrdefs')
        )

        bb_format = agis_data_map[BOUNDING_BOX]
        agis_data_structures[BOUNDING_BOX] = format_xpaths(
            _agis_definitions[BOUNDING_BOX],
            east=bb_format.format(bbox_path='eastBL'),
            south=bb_format.format(bbox_path='southBL'),
            west=bb_format.format(bbox_path='westBL'),
            north=bb_format.format(bbox_path='northBL')
        )

        ct_format = agis_data_map[CONTACTS]
        agis_data_structures[CONTACTS] = format_xpaths(
            _agis_definitions[CONTACTS],
            name=ct_format.format(ct_path='rpIndName'),
            organization=ct_format.format(ct_path='rpOrgName'),
            position=ct_format.format(ct_path='rpPosName'),
            email=ct_format.format(ct_path='rpCntInfo/cntAddress/eMailAdd')
        )

        dt_format = agis_data_map[DATES]
        agis_data_structures[DATES] = {
            DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition'),
            '_' + DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition/@date'),
            DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin'),
            '_' + DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin/@date'),
            DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd'),
            '_' + DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd/@date'),
            # Same as multiple dates, but will contain only one
            DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition'),
            '_' + DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition/@date')
        }
        agis_data_structures[DATES][DATE_TYPE_RANGE] = [
            agis_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
            agis_data_structures[DATES][DATE_TYPE_RANGE_END]
        ]
        agis_data_structures[DATES]['_' + DATE_TYPE_RANGE] = [
            agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_BEGIN],
            agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_END]
        ]

        df_format = agis_data_map[DIGITAL_FORMS]
        agis_data_structures[DIGITAL_FORMS] = format_xpaths(
            _agis_definitions[DIGITAL_FORMS],
            name=df_format.format(df_path='formatName'),
            content=df_format.format(df_path='formatInfo'),
            decompression=df_format.format(df_path='fileDecmTech'),
            version=df_format.format(df_path='formatVer'),
            specification=df_format.format(df_path='formatSpec'),
            access_desc=agis_data_map['_access_desc'],
            access_instrs=agis_data_map['_access_instrs'],
            network_resource=agis_data_map['_network_resource']
        )

        lw_format = agis_data_map[LARGER_WORKS]
        agis_data_structures[LARGER_WORKS] = format_xpaths(
            _agis_definitions[LARGER_WORKS],
            title=lw_format.format(lw_path='resTitle'),
            edition=lw_format.format(lw_path='resEd'),
            origin=lw_format.format(lw_path='citRespParty/rpIndName'),
            online_linkage=lw_format.format(lw_path='citRespParty/rpCntInfo/cntOnlineRes/linkage'),
            other_citation=lw_format.format(lw_path='otherCitDet'),
            date=lw_format.format(lw_path='date/pubDate'),
            place=lw_format.format(lw_path='citRespParty/rpCntInfo/cntAddress/city'),
            info=lw_format.format(lw_path='citRespParty/rpOrgName')
        )

        ps_format = agis_data_map[PROCESS_STEPS]
        agis_data_structures[PROCESS_STEPS] = format_xpaths(
            _agis_definitions[PROCESS_STEPS],
            description=ps_format.format(ps_path='stepDesc'),
            date=ps_format.format(ps_path='stepDateTm'),
            sources=ps_format.format(ps_path='stepSrc/srcDesc')
        )

        # Assign XPATHS and gis_metadata.utils.ParserProperties to data map

        for prop, xpath in iteritems(dict(agis_data_map)):
            if prop in (ATTRIBUTES, CONTACTS, PROCESS_STEPS):
                agis_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)

            elif prop in (BOUNDING_BOX, LARGER_WORKS):
                agis_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex)

            elif prop in ('attribute_accuracy', 'dataset_completeness'):
                agis_data_map[prop] = ParserProperty(self._parse_report_item, self._update_report_item)

            elif prop == DATES:
                agis_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates)

            elif prop == DIGITAL_FORMS:
                agis_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms)

            else:
                agis_data_map[prop] = xpath

        self._data_map = agis_data_map
        self._data_structures = agis_data_structures
Пример #7
0
    def _init_data_map(self):
        """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """

        if self._data_map is not None:
            return  # Initiation happens once

        # Parse and validate the FGDC metadata root

        if self._xml_tree is None:
            fgdc_root = FGDC_ROOT
        else:
            fgdc_root = get_element_name(self._xml_tree)

        if fgdc_root != FGDC_ROOT:
            raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=fgdc_root)

        fgdc_data_map = {'_root': FGDC_ROOT}
        fgdc_data_structures = {}

        # Capture and format other complex XPATHs

        ad_format = _fgdc_tag_formats[ATTRIBUTES]
        fgdc_data_structures[ATTRIBUTES] = format_xpaths(
            _fgdc_definitions[ATTRIBUTES],
            label=ad_format.format(ad_path='attrlabl'),
            aliases=ad_format.format(ad_path='attalias'),
            definition=ad_format.format(ad_path='attrdef'),
            definition_src=ad_format.format(ad_path='attrdefs')
        )

        bb_format = _fgdc_tag_formats[BOUNDING_BOX]
        fgdc_data_structures[BOUNDING_BOX] = format_xpaths(
            _fgdc_definitions[BOUNDING_BOX],
            east=bb_format.format(bbox_path='eastbc'),
            south=bb_format.format(bbox_path='southbc'),
            west=bb_format.format(bbox_path='westbc'),
            north=bb_format.format(bbox_path='northbc')
        )

        ct_format = _fgdc_tag_formats[CONTACTS]
        fgdc_data_structures[CONTACTS] = format_xpaths(
            _fgdc_definitions[CONTACTS],

            name=ct_format.format(ct_path='cntperp/cntper'),
            _name=ct_format.format(ct_path='cntorgp/cntper'),  # If not in cntperp

            organization=ct_format.format(ct_path='cntperp/cntorg'),
            _organization=ct_format.format(ct_path='cntorgp/cntorg'),  # If not in cntperp

            position=ct_format.format(ct_path='cntpos'),
            email=ct_format.format(ct_path='cntemail')
        )

        dt_format = _fgdc_tag_formats[DATES]
        fgdc_data_structures[DATES] = {
            DATE_TYPE_MULTIPLE: dt_format.format(type_path='mdattim/sngdate/caldate'),
            DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='rngdates/begdate'),
            DATE_TYPE_RANGE_END: dt_format.format(type_path='rngdates/enddate'),
            DATE_TYPE_SINGLE: dt_format.format(type_path='sngdate/caldate')
        }
        fgdc_data_structures[DATES][DATE_TYPE_RANGE] = [
            fgdc_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
            fgdc_data_structures[DATES][DATE_TYPE_RANGE_END]
        ]

        df_format = _fgdc_tag_formats[DIGITAL_FORMS]
        fgdc_data_structures[DIGITAL_FORMS] = format_xpaths(
            _fgdc_definitions[DIGITAL_FORMS],
            name=df_format.format(df_path='digtinfo/formname'),
            content=df_format.format(df_path='digtinfo/formcont'),
            decompression=df_format.format(df_path='digtinfo/filedec'),
            version=df_format.format(df_path='digtinfo/formvern'),
            specification=df_format.format(df_path='digtinfo/formspec'),
            access_desc=df_format.format(df_path='digtopt/onlinopt/oncomp'),
            access_instrs=df_format.format(df_path='digtopt/onlinopt/accinstr'),
            network_resource=df_format.format(df_path='digtopt/onlinopt/computer/networka/networkr')
        )

        lw_format = _fgdc_tag_formats[LARGER_WORKS]
        fgdc_data_structures[LARGER_WORKS] = format_xpaths(
            _fgdc_definitions[LARGER_WORKS],
            title=lw_format.format(lw_path='title'),
            edition=lw_format.format(lw_path='edition'),
            origin=lw_format.format(lw_path='origin'),
            online_linkage=lw_format.format(lw_path='onlink'),
            other_citation=lw_format.format(lw_path='othercit'),
            date=lw_format.format(lw_path='pubdate'),
            place=lw_format.format(lw_path='pubinfo/pubplace'),
            info=lw_format.format(lw_path='pubinfo/publish')
        )

        ps_format = _fgdc_tag_formats[PROCESS_STEPS]
        fgdc_data_structures[PROCESS_STEPS] = format_xpaths(
            _fgdc_definitions[PROCESS_STEPS],
            description=ps_format.format(ps_path='procdesc'),
            date=ps_format.format(ps_path='procdate'),
            sources=ps_format.format(ps_path='srcused')
        )

        ri_format = _fgdc_tag_formats[RASTER_INFO]
        fgdc_data_structures[RASTER_INFO] = format_xpaths(
            _fgdc_definitions[RASTER_INFO],

            dimensions=ri_format.format(ri_path='rasttype'),
            row_count=ri_format.format(ri_path='rowcount'),
            column_count=ri_format.format(ri_path='colcount'),
            vertical_count=ri_format.format(ri_path='vrtcount'),

            x_resolution=_fgdc_tag_formats['_raster_resolution'] + '/absres',
            _x_resolution=_fgdc_tag_formats['__raster_resolution'] + '/longres',
            y_resolution=_fgdc_tag_formats['_raster_resolution'] + '/ordres',
            _y_resolution=_fgdc_tag_formats['__raster_resolution'] + '/latres',
        )

        # Assign XPATHS and gis_metadata.utils.ParserProperties to fgdc_data_map

        fgdc_data_formats = dict(_fgdc_tag_formats)

        for prop, xpath in iteritems(fgdc_data_formats):
            if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS, PROCESS_STEPS):
                fgdc_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)

            elif prop in (BOUNDING_BOX, LARGER_WORKS):
                fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex)

            elif prop == DATES:
                fgdc_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates)

            elif prop == RASTER_INFO:
                fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_raster_info)

            else:
                fgdc_data_map[prop] = xpath

        self._data_map = fgdc_data_map
        self._data_structures = fgdc_data_structures