def get_parsed_content(metadata_content): """ Parses any of the following types of content: 1. XML string or file object: parses XML content 2. MetadataParser instance: deep copies xml_tree 3. Dictionary with nested objects containing: - name (required): the name of the element tag - text: the text contained by element - tail: text immediately following the element - attributes: a Dictionary containing element attributes - children: a List of converted child elements :raises InvalidContent: if the XML is invalid or does not conform to a supported metadata standard :raises NoContent: If the content passed in is null or otherwise empty :return: the XML root along with an XML Tree parsed by and compatible with element_utils """ _import_parsers() # Prevents circular dependencies between modules xml_tree = None if metadata_content is None: raise NoContent('Metadata has no data') else: if isinstance(metadata_content, MetadataParser): xml_tree = deepcopy(metadata_content._xml_tree) elif isinstance(metadata_content, dict): xml_tree = get_element_tree(metadata_content) else: try: # Strip name spaces from file or XML content xml_tree = get_element_tree(metadata_content) except Exception: xml_tree = None # Several exceptions possible, outcome is the same if xml_tree is None: raise InvalidContent( 'Cannot instantiate a {parser_type} parser with invalid content to parse', parser_type=type(metadata_content).__name__) xml_root = get_element_name(xml_tree) if xml_root is None: raise NoContent('Metadata contains no data') elif xml_root not in VALID_ROOTS: content = type(metadata_content).__name__ raise InvalidContent('Invalid root element for {content}: {xml_root}', content=content, xml_root=xml_root) return xml_root, xml_tree
def _init_data_map(self): """ OVERRIDDEN: Initialize required ISO-19115 data map with XPATHS and specialized functions """ if self._data_map is not None: return # Initiation happens once # Parse and validate the ISO metadata root if self._xml_tree is None: iso_root = ISO_ROOTS[0] else: iso_root = get_element_name(self._xml_tree) if iso_root not in ISO_ROOTS: raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=iso_root) iso_data_map = {'_root': iso_root} iso_data_map.update(_iso_tag_roots) iso_data_map.update(_iso_tag_formats) iso_data_structures = {} # Capture and format complex XPATHs ad_format = iso_data_map[ATTRIBUTES] ft_source = iso_data_map['_attr_src'].replace('/carrierOfCharacteristics/FC_FeatureAttribute', '') iso_data_structures[ATTRIBUTES] = format_xpaths( _iso_definitions[ATTRIBUTES], label=ad_format.format(ad_path='memberName/LocalName'), aliases=ad_format.format(ad_path='aliases/LocalName'), # Not in spec definition=ad_format.format(ad_path='definition/CharacterString'), # First try to populate attribute definition source from FC_FeatureAttribute definition_src=iso_data_map['_attr_src'] + '/organisationName/CharacterString', _definition_src=iso_data_map['_attr_src'] + '/individualName/CharacterString', # Then assume feature type source is the same as attribute: populate from FC_FeatureType __definition_src=ft_source + '/organisationName/CharacterString', ___definition_src=ft_source + '/individualName/CharacterString' ) bb_format = iso_data_map[BOUNDING_BOX] iso_data_structures[BOUNDING_BOX] = format_xpaths( _iso_definitions[BOUNDING_BOX], east=bb_format.format(bbox_path='eastBoundLongitude/Decimal'), south=bb_format.format(bbox_path='southBoundLatitude/Decimal'), west=bb_format.format(bbox_path='westBoundLongitude/Decimal'), north=bb_format.format(bbox_path='northBoundLatitude/Decimal') ) ct_format = iso_data_map[CONTACTS] iso_data_structures[CONTACTS] = format_xpaths( _iso_definitions[CONTACTS], name=ct_format.format(ct_path='individualName/CharacterString'), organization=ct_format.format(ct_path='organisationName/CharacterString'), position=ct_format.format(ct_path='positionName/CharacterString'), email=ct_format.format( ct_path='contactInfo/CI_Contact/address/CI_Address/electronicMailAddress/CharacterString' ) ) dt_format = iso_data_map[DATES] iso_data_structures[DATES] = { DATE_TYPE_MULTIPLE: dt_format.format(type_path='TimeInstant/timePosition'), DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TimePeriod/begin/TimeInstant/timePosition'), DATE_TYPE_RANGE_END: dt_format.format(type_path='TimePeriod/end/TimeInstant/timePosition'), DATE_TYPE_SINGLE: dt_format.format(type_path='TimeInstant/timePosition') # Same as multiple } iso_data_structures[DATES][DATE_TYPE_RANGE] = [ iso_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], iso_data_structures[DATES][DATE_TYPE_RANGE_END] ] df_format = iso_data_map[DIGITAL_FORMS] iso_data_structures[DIGITAL_FORMS] = format_xpaths( _iso_definitions[DIGITAL_FORMS], name=df_format.format(df_path='name/CharacterString'), content='', # Not supported in ISO-19115 (appending to spec) decompression=df_format.format(df_path='fileDecompressionTechnique/CharacterString'), version=df_format.format(df_path='version/CharacterString'), specification=df_format.format(df_path='specification/CharacterString'), access_desc=iso_data_map['_access_desc'], access_instrs=iso_data_map['_access_instrs'], network_resource=iso_data_map['_network_resource'] ) keywords_structure = { 'keyword_root': 'MD_Keywords/keyword', 'keyword_type': 'MD_Keywords/type/MD_KeywordTypeCode', 'keyword': 'MD_Keywords/keyword/CharacterString' } for keyword_prop in KEYWORD_PROPS: iso_data_structures[keyword_prop] = deepcopy(keywords_structure) lw_format = iso_data_map[LARGER_WORKS] iso_data_structures[LARGER_WORKS] = format_xpaths( _iso_definitions[LARGER_WORKS], title=lw_format.format(lw_path='title/CharacterString'), edition=lw_format.format(lw_path='edition/CharacterString'), origin=iso_data_map['_lw_citation'].format(lw_path='individualName/CharacterString'), online_linkage=iso_data_map['_lw_linkage'].format(lw_path='linkage/URL'), other_citation=lw_format.format(lw_path='otherCitationDetails/CharacterString'), date=lw_format.format(lw_path='editionDate/Date'), place=iso_data_map['_lw_contact'].format(lw_path='address/CI_Address/city/CharacterString'), info=iso_data_map['_lw_citation'].format(lw_path='organisationName/CharacterString') ) ps_format = iso_data_map[PROCESS_STEPS] iso_data_structures[PROCESS_STEPS] = format_xpaths( _iso_definitions[PROCESS_STEPS], description=ps_format.format(ps_path='description/CharacterString'), date=ps_format.format(ps_path='dateTime/DateTime'), sources=ps_format.format( ps_path='source/LI_Source/sourceCitation/CI_Citation/alternateTitle/CharacterString' ) ) ri_format = iso_data_map[RASTER_INFO] iso_data_structures[RASTER_INFO] = format_xpaths( _iso_definitions[RASTER_DIMS], type=ri_format.format(ri_path='dimensionName/MD_DimensionNameTypeCode'), _type=ri_format.format(ri_path='dimensionName/MD_DimensionNameTypeCode/@codeListValue'), size=ri_format.format(ri_path='dimensionSize/Integer'), value=ri_format.format(ri_path='resolution/Measure'), units=ri_format.format(ri_path='resolution/Measure/@uom') ) # Assign XPATHS and gis_metadata.utils.ParserProperties to data map for prop, xpath in iteritems(dict(iso_data_map)): if prop == ATTRIBUTES: iso_data_map[prop] = ParserProperty(self._parse_attribute_details, self._update_attribute_details) elif prop in (CONTACTS, PROCESS_STEPS): iso_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) elif prop in (BOUNDING_BOX, LARGER_WORKS): iso_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex) elif prop == DATES: iso_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates) elif prop == DIGITAL_FORMS: iso_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms) elif prop in KEYWORD_PROPS: iso_data_map[prop] = ParserProperty(self._parse_keywords, self._update_keywords) elif prop == RASTER_INFO: iso_data_map[prop] = ParserProperty(self._parse_raster_info, self._update_raster_info) else: iso_data_map[prop] = xpath self._data_map = iso_data_map self._data_structures = iso_data_structures
def _init_data_map(self): """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """ if self._data_map is not None: return # Initiation happens once # Parse and validate the FGDC metadata root if self._xml_tree is None: fgdc_root = FGDC_ROOT else: fgdc_root = get_element_name(self._xml_tree) if fgdc_root != FGDC_ROOT: raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=fgdc_root) fgdc_data_map = {'_root': FGDC_ROOT} fgdc_data_structures = {} # Capture and format other complex XPATHs ad_format = _fgdc_tag_formats[ATTRIBUTES] fgdc_data_structures[ATTRIBUTES] = format_xpaths( _fgdc_definitions[ATTRIBUTES], label=ad_format.format(ad_path='attrlabl'), aliases=ad_format.format(ad_path='attalias'), definition=ad_format.format(ad_path='attrdef'), definition_src=ad_format.format(ad_path='attrdefs') ) bb_format = _fgdc_tag_formats[BOUNDING_BOX] fgdc_data_structures[BOUNDING_BOX] = format_xpaths( _fgdc_definitions[BOUNDING_BOX], east=bb_format.format(bbox_path='eastbc'), south=bb_format.format(bbox_path='southbc'), west=bb_format.format(bbox_path='westbc'), north=bb_format.format(bbox_path='northbc') ) ct_format = _fgdc_tag_formats[CONTACTS] fgdc_data_structures[CONTACTS] = format_xpaths( _fgdc_definitions[CONTACTS], name=ct_format.format(ct_path='cntperp/cntper'), _name=ct_format.format(ct_path='cntorgp/cntper'), # If not in cntperp organization=ct_format.format(ct_path='cntperp/cntorg'), _organization=ct_format.format(ct_path='cntorgp/cntorg'), # If not in cntperp position=ct_format.format(ct_path='cntpos'), email=ct_format.format(ct_path='cntemail') ) dt_format = _fgdc_tag_formats[DATES] fgdc_data_structures[DATES] = { DATE_TYPE_MULTIPLE: dt_format.format(type_path='mdattim/sngdate/caldate'), DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='rngdates/begdate'), DATE_TYPE_RANGE_END: dt_format.format(type_path='rngdates/enddate'), DATE_TYPE_SINGLE: dt_format.format(type_path='sngdate/caldate') } fgdc_data_structures[DATES][DATE_TYPE_RANGE] = [ fgdc_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], fgdc_data_structures[DATES][DATE_TYPE_RANGE_END] ] df_format = _fgdc_tag_formats[DIGITAL_FORMS] fgdc_data_structures[DIGITAL_FORMS] = format_xpaths( _fgdc_definitions[DIGITAL_FORMS], name=df_format.format(df_path='digtinfo/formname'), content=df_format.format(df_path='digtinfo/formcont'), decompression=df_format.format(df_path='digtinfo/filedec'), version=df_format.format(df_path='digtinfo/formvern'), specification=df_format.format(df_path='digtinfo/formspec'), access_desc=df_format.format(df_path='digtopt/onlinopt/oncomp'), access_instrs=df_format.format(df_path='digtopt/onlinopt/accinstr'), network_resource=df_format.format(df_path='digtopt/onlinopt/computer/networka/networkr') ) lw_format = _fgdc_tag_formats[LARGER_WORKS] fgdc_data_structures[LARGER_WORKS] = format_xpaths( _fgdc_definitions[LARGER_WORKS], title=lw_format.format(lw_path='title'), edition=lw_format.format(lw_path='edition'), origin=lw_format.format(lw_path='origin'), online_linkage=lw_format.format(lw_path='onlink'), other_citation=lw_format.format(lw_path='othercit'), date=lw_format.format(lw_path='pubdate'), place=lw_format.format(lw_path='pubinfo/pubplace'), info=lw_format.format(lw_path='pubinfo/publish') ) ps_format = _fgdc_tag_formats[PROCESS_STEPS] fgdc_data_structures[PROCESS_STEPS] = format_xpaths( _fgdc_definitions[PROCESS_STEPS], description=ps_format.format(ps_path='procdesc'), date=ps_format.format(ps_path='procdate'), sources=ps_format.format(ps_path='srcused') ) # Assign XPATHS and gis_metadata.utils.ParserProperties to fgdc_data_map fgdc_data_formats = dict(_fgdc_tag_formats) for prop, xpath in iteritems(fgdc_data_formats): if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS, PROCESS_STEPS): fgdc_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) elif prop in (BOUNDING_BOX, LARGER_WORKS): fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex) elif prop == DATES: fgdc_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates) else: fgdc_data_map[prop] = xpath self._data_map = fgdc_data_map self._data_structures = fgdc_data_structures
def _init_data_map(self): """ OVERRIDDEN: Initialize required ISO-19115 data map with XPATHS and specialized functions """ if self._data_map is not None: return # Initiation happens once # Parse and validate the ISO metadata root if self._xml_tree is None: iso_root = ISO_ROOTS[0] else: iso_root = get_element_name(self._xml_tree) if iso_root not in ISO_ROOTS: raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=iso_root) iso_data_map = {'_root': iso_root} iso_data_map.update(_iso_tag_roots) iso_data_map.update(_iso_tag_formats) iso_data_structures = {} # Capture and format complex XPATHs ad_format = iso_data_map[ATTRIBUTES] ft_source = iso_data_map['_attr_src'].replace('/carrierOfCharacteristics/FC_FeatureAttribute', '') iso_data_structures[ATTRIBUTES] = format_xpaths( _iso_definitions[ATTRIBUTES], label=ad_format.format(ad_path='memberName/LocalName'), aliases=ad_format.format(ad_path='aliases/LocalName'), # Not in spec definition=ad_format.format(ad_path='definition/CharacterString'), # First try to populate attribute definition source from FC_FeatureAttribute definition_src=iso_data_map['_attr_src'] + '/organisationName/CharacterString', _definition_src=iso_data_map['_attr_src'] + '/individualName/CharacterString', # Then assume feature type source is the same as attribute: populate from FC_FeatureType __definition_src=ft_source + '/organisationName/CharacterString', ___definition_src=ft_source + '/individualName/CharacterString' ) bb_format = iso_data_map[BOUNDING_BOX] iso_data_structures[BOUNDING_BOX] = format_xpaths( _iso_definitions[BOUNDING_BOX], east=bb_format.format(bbox_path='eastBoundLongitude/Decimal'), south=bb_format.format(bbox_path='southBoundLatitude/Decimal'), west=bb_format.format(bbox_path='westBoundLongitude/Decimal'), north=bb_format.format(bbox_path='northBoundLatitude/Decimal') ) ct_format = iso_data_map[CONTACTS] iso_data_structures[CONTACTS] = format_xpaths( _iso_definitions[CONTACTS], name=ct_format.format(ct_path='individualName/CharacterString'), organization=ct_format.format(ct_path='organisationName/CharacterString'), position=ct_format.format(ct_path='positionName/CharacterString'), email=ct_format.format( ct_path='contactInfo/CI_Contact/address/CI_Address/electronicMailAddress/CharacterString' ) ) dt_format = iso_data_map[DATES] iso_data_structures[DATES] = { DATE_TYPE_MULTIPLE: dt_format.format(type_path='TimeInstant/timePosition'), DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TimePeriod/begin/TimeInstant/timePosition'), DATE_TYPE_RANGE_END: dt_format.format(type_path='TimePeriod/end/TimeInstant/timePosition'), DATE_TYPE_SINGLE: dt_format.format(type_path='TimeInstant/timePosition') # Same as multiple } iso_data_structures[DATES][DATE_TYPE_RANGE] = [ iso_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], iso_data_structures[DATES][DATE_TYPE_RANGE_END] ] df_format = iso_data_map[DIGITAL_FORMS] iso_data_structures[DIGITAL_FORMS] = format_xpaths( _iso_definitions[DIGITAL_FORMS], name=df_format.format(df_path='name/CharacterString'), content='', # Not supported in ISO-19115 (appending to spec) decompression=df_format.format(df_path='fileDecompressionTechnique/CharacterString'), version=df_format.format(df_path='version/CharacterString'), specification=df_format.format(df_path='specification/CharacterString'), access_desc=iso_data_map['_access_desc'], access_instrs=iso_data_map['_access_instrs'], network_resource=iso_data_map['_network_resource'] ) keywords_structure = { 'keyword_root': 'MD_Keywords/keyword', 'keyword_type': 'MD_Keywords/type/MD_KeywordTypeCode', 'keyword': 'MD_Keywords/keyword/CharacterString' } iso_data_structures[KEYWORDS_PLACE] = keywords_structure iso_data_structures[KEYWORDS_THEME] = keywords_structure lw_format = iso_data_map[LARGER_WORKS] iso_data_structures[LARGER_WORKS] = format_xpaths( _iso_definitions[LARGER_WORKS], title=lw_format.format(lw_path='title/CharacterString'), edition=lw_format.format(lw_path='edition/CharacterString'), origin=iso_data_map['_lw_citation'].format(lw_path='individualName/CharacterString'), online_linkage=iso_data_map['_lw_linkage'].format(lw_path='linkage/URL'), other_citation=lw_format.format(lw_path='otherCitationDetails/CharacterString'), date=lw_format.format(lw_path='editionDate/Date'), place=iso_data_map['_lw_contact'].format(lw_path='address/CI_Address/city/CharacterString'), info=iso_data_map['_lw_citation'].format(lw_path='organisationName/CharacterString') ) ps_format = iso_data_map[PROCESS_STEPS] iso_data_structures[PROCESS_STEPS] = format_xpaths( _iso_definitions[PROCESS_STEPS], description=ps_format.format(ps_path='description/CharacterString'), date=ps_format.format(ps_path='dateTime/DateTime'), sources=ps_format.format( ps_path='source/LI_Source/sourceCitation/CI_Citation/alternateTitle/CharacterString' ) ) # Assign XPATHS and gis_metadata.utils.ParserProperties to data map for prop, xpath in iteritems(dict(iso_data_map)): if prop == ATTRIBUTES: iso_data_map[prop] = ParserProperty(self._parse_attribute_details, self._update_attribute_details) elif prop in (CONTACTS, PROCESS_STEPS): iso_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) elif prop in (BOUNDING_BOX, LARGER_WORKS): iso_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex) elif prop == DATES: iso_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates) elif prop == DIGITAL_FORMS: iso_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms) elif prop in [KEYWORDS_PLACE, KEYWORDS_THEME]: iso_data_map[prop] = ParserProperty(self._parse_keywords, self._update_keywords) else: iso_data_map[prop] = xpath self._data_map = iso_data_map self._data_structures = iso_data_structures
def _init_data_map(self): """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """ if self._data_map is not None: return # Initiation happens once # Parse and validate the ArcGIS metadata root if self._xml_tree is None: agis_root = ARCGIS_ROOTS[0] # Default to uncapitalized else: agis_root = get_element_name(self._xml_tree) if agis_root not in ARCGIS_ROOTS: raise InvalidContent( 'Invalid XML root for ArcGIS metadata: {root}', root=agis_root) agis_data_map = {'_root': agis_root} agis_data_map.update(_agis_tag_formats) agis_data_structures = {} # Capture and format complex XPATHs ad_format = agis_data_map[ATTRIBUTES] agis_data_structures[ATTRIBUTES] = format_xpaths( _agis_definitions[ATTRIBUTES], label=ad_format.format(ad_path='attrlabl'), aliases=ad_format.format(ad_path='attalias'), definition=ad_format.format(ad_path='attrdef'), definition_src=ad_format.format(ad_path='attrdefs')) bb_format = agis_data_map[BOUNDING_BOX] agis_data_structures[BOUNDING_BOX] = format_xpaths( _agis_definitions[BOUNDING_BOX], east=bb_format.format(bbox_path='eastBL'), south=bb_format.format(bbox_path='southBL'), west=bb_format.format(bbox_path='westBL'), north=bb_format.format(bbox_path='northBL')) ct_format = agis_data_map[CONTACTS] agis_data_structures[CONTACTS] = format_xpaths( _agis_definitions[CONTACTS], name=ct_format.format(ct_path='rpIndName'), organization=ct_format.format(ct_path='rpOrgName'), position=ct_format.format(ct_path='rpPosName'), email=ct_format.format(ct_path='rpCntInfo/cntAddress/eMailAdd')) dt_format = agis_data_map[DATES] agis_data_structures[DATES] = { DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition'), '_' + DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition/@date'), DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin'), '_' + DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin/@date'), DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd'), '_' + DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd/@date'), # Same as multiple dates, but will contain only one DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition'), '_' + DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition/@date') } agis_data_structures[DATES][DATE_TYPE_RANGE] = [ agis_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], agis_data_structures[DATES][DATE_TYPE_RANGE_END] ] agis_data_structures[DATES]['_' + DATE_TYPE_RANGE] = [ agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_BEGIN], agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_END] ] df_format = agis_data_map[DIGITAL_FORMS] agis_data_structures[DIGITAL_FORMS] = format_xpaths( _agis_definitions[DIGITAL_FORMS], name=df_format.format(df_path='formatName'), content=df_format.format(df_path='formatInfo'), decompression=df_format.format(df_path='fileDecmTech'), version=df_format.format(df_path='formatVer'), specification=df_format.format(df_path='formatSpec'), access_desc=agis_data_map['_access_desc'], access_instrs=agis_data_map['_access_instrs'], network_resource=agis_data_map['_network_resource']) lw_format = agis_data_map[LARGER_WORKS] agis_data_structures[LARGER_WORKS] = format_xpaths( _agis_definitions[LARGER_WORKS], title=lw_format.format(lw_path='resTitle'), edition=lw_format.format(lw_path='resEd'), origin=lw_format.format(lw_path='citRespParty/rpIndName'), online_linkage=lw_format.format( lw_path='citRespParty/rpCntInfo/cntOnlineRes/linkage'), other_citation=lw_format.format(lw_path='otherCitDet'), date=lw_format.format(lw_path='date/pubDate'), place=lw_format.format( lw_path='citRespParty/rpCntInfo/cntAddress/city'), info=lw_format.format(lw_path='citRespParty/rpOrgName')) ps_format = agis_data_map[PROCESS_STEPS] agis_data_structures[PROCESS_STEPS] = format_xpaths( _agis_definitions[PROCESS_STEPS], description=ps_format.format(ps_path='stepDesc'), date=ps_format.format(ps_path='stepDateTm'), sources=ps_format.format(ps_path='stepSrc/srcDesc')) ri_format = agis_data_map[RASTER_INFO] agis_data_structures[RASTER_INFO] = format_xpaths( _agis_definitions[RASTER_DIMS], type=ri_format.format(ri_path='@type'), size=ri_format.format(ri_path='dimSize'), value=ri_format.format(ri_path='dimResol/value'), units=ri_format.format(ri_path='dimResol/value/@uom')) # Assign XPATHS and gis_metadata.utils.ParserProperties to data map for prop, xpath in iteritems(dict(agis_data_map)): if prop in (ATTRIBUTES, CONTACTS, PROCESS_STEPS): agis_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) elif prop in (BOUNDING_BOX, LARGER_WORKS): agis_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex) elif prop in ('attribute_accuracy', 'dataset_completeness'): agis_data_map[prop] = ParserProperty(self._parse_report_item, self._update_report_item) elif prop == DATES: agis_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates) elif prop == DIGITAL_FORMS: agis_data_map[prop] = ParserProperty( self._parse_digital_forms, self._update_digital_forms) elif prop == RASTER_INFO: agis_data_map[prop] = ParserProperty(self._parse_raster_info, self._update_raster_info) else: agis_data_map[prop] = xpath self._data_map = agis_data_map self._data_structures = agis_data_structures
def _init_data_map(self): """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """ if self._data_map is not None: return # Initiation happens once # Parse and validate the ArcGIS metadata root if self._xml_tree is None: agis_root = ARCGIS_ROOTS[0] # Default to uncapitalized else: agis_root = get_element_name(self._xml_tree) if agis_root not in ARCGIS_ROOTS: raise InvalidContent('Invalid XML root for ArcGIS metadata: {root}', root=agis_root) agis_data_map = {'_root': agis_root} agis_data_map.update(_agis_tag_formats) agis_data_structures = {} # Capture and format complex XPATHs ad_format = agis_data_map[ATTRIBUTES] agis_data_structures[ATTRIBUTES] = format_xpaths( _agis_definitions[ATTRIBUTES], label=ad_format.format(ad_path='attrlabl'), aliases=ad_format.format(ad_path='attalias'), definition=ad_format.format(ad_path='attrdef'), definition_src=ad_format.format(ad_path='attrdefs') ) bb_format = agis_data_map[BOUNDING_BOX] agis_data_structures[BOUNDING_BOX] = format_xpaths( _agis_definitions[BOUNDING_BOX], east=bb_format.format(bbox_path='eastBL'), south=bb_format.format(bbox_path='southBL'), west=bb_format.format(bbox_path='westBL'), north=bb_format.format(bbox_path='northBL') ) ct_format = agis_data_map[CONTACTS] agis_data_structures[CONTACTS] = format_xpaths( _agis_definitions[CONTACTS], name=ct_format.format(ct_path='rpIndName'), organization=ct_format.format(ct_path='rpOrgName'), position=ct_format.format(ct_path='rpPosName'), email=ct_format.format(ct_path='rpCntInfo/cntAddress/eMailAdd') ) dt_format = agis_data_map[DATES] agis_data_structures[DATES] = { DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition'), '_' + DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition/@date'), DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin'), '_' + DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin/@date'), DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd'), '_' + DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd/@date'), # Same as multiple dates, but will contain only one DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition'), '_' + DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition/@date') } agis_data_structures[DATES][DATE_TYPE_RANGE] = [ agis_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], agis_data_structures[DATES][DATE_TYPE_RANGE_END] ] agis_data_structures[DATES]['_' + DATE_TYPE_RANGE] = [ agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_BEGIN], agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_END] ] df_format = agis_data_map[DIGITAL_FORMS] agis_data_structures[DIGITAL_FORMS] = format_xpaths( _agis_definitions[DIGITAL_FORMS], name=df_format.format(df_path='formatName'), content=df_format.format(df_path='formatInfo'), decompression=df_format.format(df_path='fileDecmTech'), version=df_format.format(df_path='formatVer'), specification=df_format.format(df_path='formatSpec'), access_desc=agis_data_map['_access_desc'], access_instrs=agis_data_map['_access_instrs'], network_resource=agis_data_map['_network_resource'] ) lw_format = agis_data_map[LARGER_WORKS] agis_data_structures[LARGER_WORKS] = format_xpaths( _agis_definitions[LARGER_WORKS], title=lw_format.format(lw_path='resTitle'), edition=lw_format.format(lw_path='resEd'), origin=lw_format.format(lw_path='citRespParty/rpIndName'), online_linkage=lw_format.format(lw_path='citRespParty/rpCntInfo/cntOnlineRes/linkage'), other_citation=lw_format.format(lw_path='otherCitDet'), date=lw_format.format(lw_path='date/pubDate'), place=lw_format.format(lw_path='citRespParty/rpCntInfo/cntAddress/city'), info=lw_format.format(lw_path='citRespParty/rpOrgName') ) ps_format = agis_data_map[PROCESS_STEPS] agis_data_structures[PROCESS_STEPS] = format_xpaths( _agis_definitions[PROCESS_STEPS], description=ps_format.format(ps_path='stepDesc'), date=ps_format.format(ps_path='stepDateTm'), sources=ps_format.format(ps_path='stepSrc/srcDesc') ) # Assign XPATHS and gis_metadata.utils.ParserProperties to data map for prop, xpath in iteritems(dict(agis_data_map)): if prop in (ATTRIBUTES, CONTACTS, PROCESS_STEPS): agis_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) elif prop in (BOUNDING_BOX, LARGER_WORKS): agis_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex) elif prop in ('attribute_accuracy', 'dataset_completeness'): agis_data_map[prop] = ParserProperty(self._parse_report_item, self._update_report_item) elif prop == DATES: agis_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates) elif prop == DIGITAL_FORMS: agis_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms) else: agis_data_map[prop] = xpath self._data_map = agis_data_map self._data_structures = agis_data_structures
def _init_data_map(self): """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """ if self._data_map is not None: return # Initiation happens once # Parse and validate the FGDC metadata root if self._xml_tree is None: fgdc_root = FGDC_ROOT else: fgdc_root = get_element_name(self._xml_tree) if fgdc_root != FGDC_ROOT: raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=fgdc_root) fgdc_data_map = {'_root': FGDC_ROOT} fgdc_data_structures = {} # Capture and format other complex XPATHs ad_format = _fgdc_tag_formats[ATTRIBUTES] fgdc_data_structures[ATTRIBUTES] = format_xpaths( _fgdc_definitions[ATTRIBUTES], label=ad_format.format(ad_path='attrlabl'), aliases=ad_format.format(ad_path='attalias'), definition=ad_format.format(ad_path='attrdef'), definition_src=ad_format.format(ad_path='attrdefs') ) bb_format = _fgdc_tag_formats[BOUNDING_BOX] fgdc_data_structures[BOUNDING_BOX] = format_xpaths( _fgdc_definitions[BOUNDING_BOX], east=bb_format.format(bbox_path='eastbc'), south=bb_format.format(bbox_path='southbc'), west=bb_format.format(bbox_path='westbc'), north=bb_format.format(bbox_path='northbc') ) ct_format = _fgdc_tag_formats[CONTACTS] fgdc_data_structures[CONTACTS] = format_xpaths( _fgdc_definitions[CONTACTS], name=ct_format.format(ct_path='cntperp/cntper'), _name=ct_format.format(ct_path='cntorgp/cntper'), # If not in cntperp organization=ct_format.format(ct_path='cntperp/cntorg'), _organization=ct_format.format(ct_path='cntorgp/cntorg'), # If not in cntperp position=ct_format.format(ct_path='cntpos'), email=ct_format.format(ct_path='cntemail') ) dt_format = _fgdc_tag_formats[DATES] fgdc_data_structures[DATES] = { DATE_TYPE_MULTIPLE: dt_format.format(type_path='mdattim/sngdate/caldate'), DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='rngdates/begdate'), DATE_TYPE_RANGE_END: dt_format.format(type_path='rngdates/enddate'), DATE_TYPE_SINGLE: dt_format.format(type_path='sngdate/caldate') } fgdc_data_structures[DATES][DATE_TYPE_RANGE] = [ fgdc_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], fgdc_data_structures[DATES][DATE_TYPE_RANGE_END] ] df_format = _fgdc_tag_formats[DIGITAL_FORMS] fgdc_data_structures[DIGITAL_FORMS] = format_xpaths( _fgdc_definitions[DIGITAL_FORMS], name=df_format.format(df_path='digtinfo/formname'), content=df_format.format(df_path='digtinfo/formcont'), decompression=df_format.format(df_path='digtinfo/filedec'), version=df_format.format(df_path='digtinfo/formvern'), specification=df_format.format(df_path='digtinfo/formspec'), access_desc=df_format.format(df_path='digtopt/onlinopt/oncomp'), access_instrs=df_format.format(df_path='digtopt/onlinopt/accinstr'), network_resource=df_format.format(df_path='digtopt/onlinopt/computer/networka/networkr') ) lw_format = _fgdc_tag_formats[LARGER_WORKS] fgdc_data_structures[LARGER_WORKS] = format_xpaths( _fgdc_definitions[LARGER_WORKS], title=lw_format.format(lw_path='title'), edition=lw_format.format(lw_path='edition'), origin=lw_format.format(lw_path='origin'), online_linkage=lw_format.format(lw_path='onlink'), other_citation=lw_format.format(lw_path='othercit'), date=lw_format.format(lw_path='pubdate'), place=lw_format.format(lw_path='pubinfo/pubplace'), info=lw_format.format(lw_path='pubinfo/publish') ) ps_format = _fgdc_tag_formats[PROCESS_STEPS] fgdc_data_structures[PROCESS_STEPS] = format_xpaths( _fgdc_definitions[PROCESS_STEPS], description=ps_format.format(ps_path='procdesc'), date=ps_format.format(ps_path='procdate'), sources=ps_format.format(ps_path='srcused') ) ri_format = _fgdc_tag_formats[RASTER_INFO] fgdc_data_structures[RASTER_INFO] = format_xpaths( _fgdc_definitions[RASTER_INFO], dimensions=ri_format.format(ri_path='rasttype'), row_count=ri_format.format(ri_path='rowcount'), column_count=ri_format.format(ri_path='colcount'), vertical_count=ri_format.format(ri_path='vrtcount'), x_resolution=_fgdc_tag_formats['_raster_resolution'] + '/absres', _x_resolution=_fgdc_tag_formats['__raster_resolution'] + '/longres', y_resolution=_fgdc_tag_formats['_raster_resolution'] + '/ordres', _y_resolution=_fgdc_tag_formats['__raster_resolution'] + '/latres', ) # Assign XPATHS and gis_metadata.utils.ParserProperties to fgdc_data_map fgdc_data_formats = dict(_fgdc_tag_formats) for prop, xpath in iteritems(fgdc_data_formats): if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS, PROCESS_STEPS): fgdc_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) elif prop in (BOUNDING_BOX, LARGER_WORKS): fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex) elif prop == DATES: fgdc_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates) elif prop == RASTER_INFO: fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_raster_info) else: fgdc_data_map[prop] = xpath self._data_map = fgdc_data_map self._data_structures = fgdc_data_structures