def _init_data_map(self): super(CustomIsoParser, self)._init_data_map() # Basic property: text or list (with backup location referencing codeListValue attribute) lang_prop = "metadata_language" self._data_map[lang_prop] = "language/CharacterString" # Parse from here if present self._data_map["_" + lang_prop] = "language/LanguageCode/@codeListValue" # Otherwise, try from here # Complex structure (reuse of contacts structure plus phone) # Define some basic variables ct_prop = "metadata_contacts" ct_xpath = "contact/CI_ResponsibleParty/{ct_path}" ct_defintion = get_complex_definitions()[CONTACTS] ct_defintion["phone"] = "{phone}" # Reuse CONTACT structure to specify locations per prop (adapted only slightly from parent) self._data_structures[ct_prop] = format_xpaths( ct_defintion, name=ct_xpath.format(ct_path="individualName/CharacterString"), organization=ct_xpath.format(ct_path="organisationName/CharacterString"), position=ct_xpath.format(ct_path="positionName/CharacterString"), phone=ct_xpath.format(ct_path="contactInfo/CI_Contact/phone/CI_Telephone/voice/CharacterString"), email=ct_xpath.format( ct_path="contactInfo/CI_Contact/address/CI_Address/electronicMailAddress/CharacterString" ), ) # Set the root and add getter/setter (parser/updater) to the data map self._data_map["_{prop}_root".format(prop=ct_prop)] = "contact" self._data_map[ct_prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) # And finally, let the parent validation logic know about the two new custom properties self._metadata_props.add(lang_prop) self._metadata_props.add(ct_prop)
def _init_data_map(self): """ OVERRIDDEN: Initialize required ISO-19115 data map with XPATHS and specialized functions """ if self._data_map is not None: return # Initiation happens once # Parse and validate the ISO metadata root if self._xml_tree is None: iso_root = ISO_ROOTS[0] else: iso_root = get_element_name(self._xml_tree) if iso_root not in ISO_ROOTS: raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=iso_root) iso_data_map = {'_root': iso_root} iso_data_map.update(_iso_tag_roots) iso_data_map.update(_iso_tag_formats) iso_data_structures = {} # Capture and format complex XPATHs ad_format = iso_data_map[ATTRIBUTES] ft_source = iso_data_map['_attr_src'].replace('/carrierOfCharacteristics/FC_FeatureAttribute', '') iso_data_structures[ATTRIBUTES] = format_xpaths( _iso_definitions[ATTRIBUTES], label=ad_format.format(ad_path='memberName/LocalName'), aliases=ad_format.format(ad_path='aliases/LocalName'), # Not in spec definition=ad_format.format(ad_path='definition/CharacterString'), # First try to populate attribute definition source from FC_FeatureAttribute definition_src=iso_data_map['_attr_src'] + '/organisationName/CharacterString', _definition_src=iso_data_map['_attr_src'] + '/individualName/CharacterString', # Then assume feature type source is the same as attribute: populate from FC_FeatureType __definition_src=ft_source + '/organisationName/CharacterString', ___definition_src=ft_source + '/individualName/CharacterString' ) bb_format = iso_data_map[BOUNDING_BOX] iso_data_structures[BOUNDING_BOX] = format_xpaths( _iso_definitions[BOUNDING_BOX], east=bb_format.format(bbox_path='eastBoundLongitude/Decimal'), south=bb_format.format(bbox_path='southBoundLatitude/Decimal'), west=bb_format.format(bbox_path='westBoundLongitude/Decimal'), north=bb_format.format(bbox_path='northBoundLatitude/Decimal') ) ct_format = iso_data_map[CONTACTS] iso_data_structures[CONTACTS] = format_xpaths( _iso_definitions[CONTACTS], name=ct_format.format(ct_path='individualName/CharacterString'), organization=ct_format.format(ct_path='organisationName/CharacterString'), position=ct_format.format(ct_path='positionName/CharacterString'), email=ct_format.format( ct_path='contactInfo/CI_Contact/address/CI_Address/electronicMailAddress/CharacterString' ) ) dt_format = iso_data_map[DATES] iso_data_structures[DATES] = { DATE_TYPE_MULTIPLE: dt_format.format(type_path='TimeInstant/timePosition'), DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TimePeriod/begin/TimeInstant/timePosition'), DATE_TYPE_RANGE_END: dt_format.format(type_path='TimePeriod/end/TimeInstant/timePosition'), DATE_TYPE_SINGLE: dt_format.format(type_path='TimeInstant/timePosition') # Same as multiple } iso_data_structures[DATES][DATE_TYPE_RANGE] = [ iso_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], iso_data_structures[DATES][DATE_TYPE_RANGE_END] ] df_format = iso_data_map[DIGITAL_FORMS] iso_data_structures[DIGITAL_FORMS] = format_xpaths( _iso_definitions[DIGITAL_FORMS], name=df_format.format(df_path='name/CharacterString'), content='', # Not supported in ISO-19115 (appending to spec) decompression=df_format.format(df_path='fileDecompressionTechnique/CharacterString'), version=df_format.format(df_path='version/CharacterString'), specification=df_format.format(df_path='specification/CharacterString'), access_desc=iso_data_map['_access_desc'], access_instrs=iso_data_map['_access_instrs'], network_resource=iso_data_map['_network_resource'] ) keywords_structure = { 'keyword_root': 'MD_Keywords/keyword', 'keyword_type': 'MD_Keywords/type/MD_KeywordTypeCode', 'keyword': 'MD_Keywords/keyword/CharacterString' } for keyword_prop in KEYWORD_PROPS: iso_data_structures[keyword_prop] = deepcopy(keywords_structure) lw_format = iso_data_map[LARGER_WORKS] iso_data_structures[LARGER_WORKS] = format_xpaths( _iso_definitions[LARGER_WORKS], title=lw_format.format(lw_path='title/CharacterString'), edition=lw_format.format(lw_path='edition/CharacterString'), origin=iso_data_map['_lw_citation'].format(lw_path='individualName/CharacterString'), online_linkage=iso_data_map['_lw_linkage'].format(lw_path='linkage/URL'), other_citation=lw_format.format(lw_path='otherCitationDetails/CharacterString'), date=lw_format.format(lw_path='editionDate/Date'), place=iso_data_map['_lw_contact'].format(lw_path='address/CI_Address/city/CharacterString'), info=iso_data_map['_lw_citation'].format(lw_path='organisationName/CharacterString') ) ps_format = iso_data_map[PROCESS_STEPS] iso_data_structures[PROCESS_STEPS] = format_xpaths( _iso_definitions[PROCESS_STEPS], description=ps_format.format(ps_path='description/CharacterString'), date=ps_format.format(ps_path='dateTime/DateTime'), sources=ps_format.format( ps_path='source/LI_Source/sourceCitation/CI_Citation/alternateTitle/CharacterString' ) ) ri_format = iso_data_map[RASTER_INFO] iso_data_structures[RASTER_INFO] = format_xpaths( _iso_definitions[RASTER_DIMS], type=ri_format.format(ri_path='dimensionName/MD_DimensionNameTypeCode'), _type=ri_format.format(ri_path='dimensionName/MD_DimensionNameTypeCode/@codeListValue'), size=ri_format.format(ri_path='dimensionSize/Integer'), value=ri_format.format(ri_path='resolution/Measure'), units=ri_format.format(ri_path='resolution/Measure/@uom') ) # Assign XPATHS and gis_metadata.utils.ParserProperties to data map for prop, xpath in iteritems(dict(iso_data_map)): if prop == ATTRIBUTES: iso_data_map[prop] = ParserProperty(self._parse_attribute_details, self._update_attribute_details) elif prop in (CONTACTS, PROCESS_STEPS): iso_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) elif prop in (BOUNDING_BOX, LARGER_WORKS): iso_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex) elif prop == DATES: iso_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates) elif prop == DIGITAL_FORMS: iso_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms) elif prop in KEYWORD_PROPS: iso_data_map[prop] = ParserProperty(self._parse_keywords, self._update_keywords) elif prop == RASTER_INFO: iso_data_map[prop] = ParserProperty(self._parse_raster_info, self._update_raster_info) else: iso_data_map[prop] = xpath self._data_map = iso_data_map self._data_structures = iso_data_structures
# Supported in separate file ISO-19110: FC_FeatureCatalog ('_attr_root', 'FC_FeatureCatalogue'), ('_attr_base', 'featureType/FC_FeatureType/carrierOfCharacteristics/FC_FeatureAttribute'), ('_attr_def', '{_attr_base}/definitionReference/FC_DefinitionReference/definitionSource/FC_DefinitionSource'), ('_attr_src', '{_attr_def}/source/CI_Citation/citedResponsibleParty/CI_ResponsibleParty'), # References to separate file ISO-19110 from: MD_Metadata ('_attr_citation', 'contentInfo/MD_FeatureCatalogueDescription/featureCatalogueCitation'), ('_attr_contact', '{_attr_citation}/CI_Citation/citedResponsibleParty/CI_ResponsibleParty/contactInfo/CI_Contact'), ('_attr_contact_url', '{_attr_contact}/onlineResource/CI_OnlineResource/linkage/URL') )) # Two passes required because of self references within roots dict _iso_tag_roots.update(format_xpaths(_iso_tag_roots, **_iso_tag_roots)) _iso_tag_roots.update(format_xpaths(_iso_tag_roots, **_iso_tag_roots)) _iso_tag_formats = { # Property-specific xpath roots: the base from which each element repeats '_attribute_accuracy_root': '{_dataqual_report}', '_attributes_root': 'featureType/FC_FeatureType/carrierOfCharacteristics', '_bounding_box_root': '{_idinfo_extent}/geographicElement', '_contacts_root': '{_idinfo}/pointOfContact', '_dataset_completeness_root': '{_dataqual_report}', '_dates_root': '{_idinfo_extent}/temporalElement', '_digital_forms_root': '{_distinfo}/distributionFormat', '_dist_liability_root': '{_idinfo}/resourceConstraints', '_transfer_options_root': '{_distinfo}/transferOptions/MD_DigitalTransferOptions/onLine', '_keywords_root': '{_idinfo}/descriptiveKeywords', '_larger_works_root': '{_idinfo_aggregate_citation}',
def _init_data_map(self): """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """ if self._data_map is not None: return # Initiation happens once # Parse and validate the FGDC metadata root if self._xml_tree is None: fgdc_root = FGDC_ROOT else: fgdc_root = get_element_name(self._xml_tree) if fgdc_root != FGDC_ROOT: raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=fgdc_root) fgdc_data_map = {'_root': FGDC_ROOT} fgdc_data_structures = {} # Capture and format other complex XPATHs ad_format = _fgdc_tag_formats[ATTRIBUTES] fgdc_data_structures[ATTRIBUTES] = format_xpaths( _fgdc_definitions[ATTRIBUTES], label=ad_format.format(ad_path='attrlabl'), aliases=ad_format.format(ad_path='attalias'), definition=ad_format.format(ad_path='attrdef'), definition_src=ad_format.format(ad_path='attrdefs') ) bb_format = _fgdc_tag_formats[BOUNDING_BOX] fgdc_data_structures[BOUNDING_BOX] = format_xpaths( _fgdc_definitions[BOUNDING_BOX], east=bb_format.format(bbox_path='eastbc'), south=bb_format.format(bbox_path='southbc'), west=bb_format.format(bbox_path='westbc'), north=bb_format.format(bbox_path='northbc') ) ct_format = _fgdc_tag_formats[CONTACTS] fgdc_data_structures[CONTACTS] = format_xpaths( _fgdc_definitions[CONTACTS], name=ct_format.format(ct_path='cntperp/cntper'), _name=ct_format.format(ct_path='cntorgp/cntper'), # If not in cntperp organization=ct_format.format(ct_path='cntperp/cntorg'), _organization=ct_format.format(ct_path='cntorgp/cntorg'), # If not in cntperp position=ct_format.format(ct_path='cntpos'), email=ct_format.format(ct_path='cntemail') ) dt_format = _fgdc_tag_formats[DATES] fgdc_data_structures[DATES] = { DATE_TYPE_MULTIPLE: dt_format.format(type_path='mdattim/sngdate/caldate'), DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='rngdates/begdate'), DATE_TYPE_RANGE_END: dt_format.format(type_path='rngdates/enddate'), DATE_TYPE_SINGLE: dt_format.format(type_path='sngdate/caldate') } fgdc_data_structures[DATES][DATE_TYPE_RANGE] = [ fgdc_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], fgdc_data_structures[DATES][DATE_TYPE_RANGE_END] ] df_format = _fgdc_tag_formats[DIGITAL_FORMS] fgdc_data_structures[DIGITAL_FORMS] = format_xpaths( _fgdc_definitions[DIGITAL_FORMS], name=df_format.format(df_path='digtinfo/formname'), content=df_format.format(df_path='digtinfo/formcont'), decompression=df_format.format(df_path='digtinfo/filedec'), version=df_format.format(df_path='digtinfo/formvern'), specification=df_format.format(df_path='digtinfo/formspec'), access_desc=df_format.format(df_path='digtopt/onlinopt/oncomp'), access_instrs=df_format.format(df_path='digtopt/onlinopt/accinstr'), network_resource=df_format.format(df_path='digtopt/onlinopt/computer/networka/networkr') ) lw_format = _fgdc_tag_formats[LARGER_WORKS] fgdc_data_structures[LARGER_WORKS] = format_xpaths( _fgdc_definitions[LARGER_WORKS], title=lw_format.format(lw_path='title'), edition=lw_format.format(lw_path='edition'), origin=lw_format.format(lw_path='origin'), online_linkage=lw_format.format(lw_path='onlink'), other_citation=lw_format.format(lw_path='othercit'), date=lw_format.format(lw_path='pubdate'), place=lw_format.format(lw_path='pubinfo/pubplace'), info=lw_format.format(lw_path='pubinfo/publish') ) ps_format = _fgdc_tag_formats[PROCESS_STEPS] fgdc_data_structures[PROCESS_STEPS] = format_xpaths( _fgdc_definitions[PROCESS_STEPS], description=ps_format.format(ps_path='procdesc'), date=ps_format.format(ps_path='procdate'), sources=ps_format.format(ps_path='srcused') ) # Assign XPATHS and gis_metadata.utils.ParserProperties to fgdc_data_map fgdc_data_formats = dict(_fgdc_tag_formats) for prop, xpath in iteritems(fgdc_data_formats): if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS, PROCESS_STEPS): fgdc_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) elif prop in (BOUNDING_BOX, LARGER_WORKS): fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex) elif prop == DATES: fgdc_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates) else: fgdc_data_map[prop] = xpath self._data_map = fgdc_data_map self._data_structures = fgdc_data_structures
def _init_data_map(self): """ OVERRIDDEN: Initialize required ISO-19115 data map with XPATHS and specialized functions """ if self._data_map is not None: return # Initiation happens once # Parse and validate the ISO metadata root if self._xml_tree is None: iso_root = ISO_ROOTS[0] else: iso_root = get_element_name(self._xml_tree) if iso_root not in ISO_ROOTS: raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=iso_root) iso_data_map = {'_root': iso_root} iso_data_map.update(_iso_tag_roots) iso_data_map.update(_iso_tag_formats) iso_data_structures = {} # Capture and format complex XPATHs ad_format = iso_data_map[ATTRIBUTES] ft_source = iso_data_map['_attr_src'].replace('/carrierOfCharacteristics/FC_FeatureAttribute', '') iso_data_structures[ATTRIBUTES] = format_xpaths( _iso_definitions[ATTRIBUTES], label=ad_format.format(ad_path='memberName/LocalName'), aliases=ad_format.format(ad_path='aliases/LocalName'), # Not in spec definition=ad_format.format(ad_path='definition/CharacterString'), # First try to populate attribute definition source from FC_FeatureAttribute definition_src=iso_data_map['_attr_src'] + '/organisationName/CharacterString', _definition_src=iso_data_map['_attr_src'] + '/individualName/CharacterString', # Then assume feature type source is the same as attribute: populate from FC_FeatureType __definition_src=ft_source + '/organisationName/CharacterString', ___definition_src=ft_source + '/individualName/CharacterString' ) bb_format = iso_data_map[BOUNDING_BOX] iso_data_structures[BOUNDING_BOX] = format_xpaths( _iso_definitions[BOUNDING_BOX], east=bb_format.format(bbox_path='eastBoundLongitude/Decimal'), south=bb_format.format(bbox_path='southBoundLatitude/Decimal'), west=bb_format.format(bbox_path='westBoundLongitude/Decimal'), north=bb_format.format(bbox_path='northBoundLatitude/Decimal') ) ct_format = iso_data_map[CONTACTS] iso_data_structures[CONTACTS] = format_xpaths( _iso_definitions[CONTACTS], name=ct_format.format(ct_path='individualName/CharacterString'), organization=ct_format.format(ct_path='organisationName/CharacterString'), position=ct_format.format(ct_path='positionName/CharacterString'), email=ct_format.format( ct_path='contactInfo/CI_Contact/address/CI_Address/electronicMailAddress/CharacterString' ) ) dt_format = iso_data_map[DATES] iso_data_structures[DATES] = { DATE_TYPE_MULTIPLE: dt_format.format(type_path='TimeInstant/timePosition'), DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TimePeriod/begin/TimeInstant/timePosition'), DATE_TYPE_RANGE_END: dt_format.format(type_path='TimePeriod/end/TimeInstant/timePosition'), DATE_TYPE_SINGLE: dt_format.format(type_path='TimeInstant/timePosition') # Same as multiple } iso_data_structures[DATES][DATE_TYPE_RANGE] = [ iso_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], iso_data_structures[DATES][DATE_TYPE_RANGE_END] ] df_format = iso_data_map[DIGITAL_FORMS] iso_data_structures[DIGITAL_FORMS] = format_xpaths( _iso_definitions[DIGITAL_FORMS], name=df_format.format(df_path='name/CharacterString'), content='', # Not supported in ISO-19115 (appending to spec) decompression=df_format.format(df_path='fileDecompressionTechnique/CharacterString'), version=df_format.format(df_path='version/CharacterString'), specification=df_format.format(df_path='specification/CharacterString'), access_desc=iso_data_map['_access_desc'], access_instrs=iso_data_map['_access_instrs'], network_resource=iso_data_map['_network_resource'] ) keywords_structure = { 'keyword_root': 'MD_Keywords/keyword', 'keyword_type': 'MD_Keywords/type/MD_KeywordTypeCode', 'keyword': 'MD_Keywords/keyword/CharacterString' } iso_data_structures[KEYWORDS_PLACE] = keywords_structure iso_data_structures[KEYWORDS_THEME] = keywords_structure lw_format = iso_data_map[LARGER_WORKS] iso_data_structures[LARGER_WORKS] = format_xpaths( _iso_definitions[LARGER_WORKS], title=lw_format.format(lw_path='title/CharacterString'), edition=lw_format.format(lw_path='edition/CharacterString'), origin=iso_data_map['_lw_citation'].format(lw_path='individualName/CharacterString'), online_linkage=iso_data_map['_lw_linkage'].format(lw_path='linkage/URL'), other_citation=lw_format.format(lw_path='otherCitationDetails/CharacterString'), date=lw_format.format(lw_path='editionDate/Date'), place=iso_data_map['_lw_contact'].format(lw_path='address/CI_Address/city/CharacterString'), info=iso_data_map['_lw_citation'].format(lw_path='organisationName/CharacterString') ) ps_format = iso_data_map[PROCESS_STEPS] iso_data_structures[PROCESS_STEPS] = format_xpaths( _iso_definitions[PROCESS_STEPS], description=ps_format.format(ps_path='description/CharacterString'), date=ps_format.format(ps_path='dateTime/DateTime'), sources=ps_format.format( ps_path='source/LI_Source/sourceCitation/CI_Citation/alternateTitle/CharacterString' ) ) # Assign XPATHS and gis_metadata.utils.ParserProperties to data map for prop, xpath in iteritems(dict(iso_data_map)): if prop == ATTRIBUTES: iso_data_map[prop] = ParserProperty(self._parse_attribute_details, self._update_attribute_details) elif prop in (CONTACTS, PROCESS_STEPS): iso_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) elif prop in (BOUNDING_BOX, LARGER_WORKS): iso_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex) elif prop == DATES: iso_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates) elif prop == DIGITAL_FORMS: iso_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms) elif prop in [KEYWORDS_PLACE, KEYWORDS_THEME]: iso_data_map[prop] = ParserProperty(self._parse_keywords, self._update_keywords) else: iso_data_map[prop] = xpath self._data_map = iso_data_map self._data_structures = iso_data_structures
def _init_data_map(self): """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """ if self._data_map is not None: return # Initiation happens once # Parse and validate the ArcGIS metadata root if self._xml_tree is None: agis_root = ARCGIS_ROOTS[0] # Default to uncapitalized else: agis_root = get_element_name(self._xml_tree) if agis_root not in ARCGIS_ROOTS: raise InvalidContent( 'Invalid XML root for ArcGIS metadata: {root}', root=agis_root) agis_data_map = {'_root': agis_root} agis_data_map.update(_agis_tag_formats) agis_data_structures = {} # Capture and format complex XPATHs ad_format = agis_data_map[ATTRIBUTES] agis_data_structures[ATTRIBUTES] = format_xpaths( _agis_definitions[ATTRIBUTES], label=ad_format.format(ad_path='attrlabl'), aliases=ad_format.format(ad_path='attalias'), definition=ad_format.format(ad_path='attrdef'), definition_src=ad_format.format(ad_path='attrdefs')) bb_format = agis_data_map[BOUNDING_BOX] agis_data_structures[BOUNDING_BOX] = format_xpaths( _agis_definitions[BOUNDING_BOX], east=bb_format.format(bbox_path='eastBL'), south=bb_format.format(bbox_path='southBL'), west=bb_format.format(bbox_path='westBL'), north=bb_format.format(bbox_path='northBL')) ct_format = agis_data_map[CONTACTS] agis_data_structures[CONTACTS] = format_xpaths( _agis_definitions[CONTACTS], name=ct_format.format(ct_path='rpIndName'), organization=ct_format.format(ct_path='rpOrgName'), position=ct_format.format(ct_path='rpPosName'), email=ct_format.format(ct_path='rpCntInfo/cntAddress/eMailAdd')) dt_format = agis_data_map[DATES] agis_data_structures[DATES] = { DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition'), '_' + DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition/@date'), DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin'), '_' + DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin/@date'), DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd'), '_' + DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd/@date'), # Same as multiple dates, but will contain only one DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition'), '_' + DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition/@date') } agis_data_structures[DATES][DATE_TYPE_RANGE] = [ agis_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], agis_data_structures[DATES][DATE_TYPE_RANGE_END] ] agis_data_structures[DATES]['_' + DATE_TYPE_RANGE] = [ agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_BEGIN], agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_END] ] df_format = agis_data_map[DIGITAL_FORMS] agis_data_structures[DIGITAL_FORMS] = format_xpaths( _agis_definitions[DIGITAL_FORMS], name=df_format.format(df_path='formatName'), content=df_format.format(df_path='formatInfo'), decompression=df_format.format(df_path='fileDecmTech'), version=df_format.format(df_path='formatVer'), specification=df_format.format(df_path='formatSpec'), access_desc=agis_data_map['_access_desc'], access_instrs=agis_data_map['_access_instrs'], network_resource=agis_data_map['_network_resource']) lw_format = agis_data_map[LARGER_WORKS] agis_data_structures[LARGER_WORKS] = format_xpaths( _agis_definitions[LARGER_WORKS], title=lw_format.format(lw_path='resTitle'), edition=lw_format.format(lw_path='resEd'), origin=lw_format.format(lw_path='citRespParty/rpIndName'), online_linkage=lw_format.format( lw_path='citRespParty/rpCntInfo/cntOnlineRes/linkage'), other_citation=lw_format.format(lw_path='otherCitDet'), date=lw_format.format(lw_path='date/pubDate'), place=lw_format.format( lw_path='citRespParty/rpCntInfo/cntAddress/city'), info=lw_format.format(lw_path='citRespParty/rpOrgName')) ps_format = agis_data_map[PROCESS_STEPS] agis_data_structures[PROCESS_STEPS] = format_xpaths( _agis_definitions[PROCESS_STEPS], description=ps_format.format(ps_path='stepDesc'), date=ps_format.format(ps_path='stepDateTm'), sources=ps_format.format(ps_path='stepSrc/srcDesc')) ri_format = agis_data_map[RASTER_INFO] agis_data_structures[RASTER_INFO] = format_xpaths( _agis_definitions[RASTER_DIMS], type=ri_format.format(ri_path='@type'), size=ri_format.format(ri_path='dimSize'), value=ri_format.format(ri_path='dimResol/value'), units=ri_format.format(ri_path='dimResol/value/@uom')) # Assign XPATHS and gis_metadata.utils.ParserProperties to data map for prop, xpath in iteritems(dict(agis_data_map)): if prop in (ATTRIBUTES, CONTACTS, PROCESS_STEPS): agis_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) elif prop in (BOUNDING_BOX, LARGER_WORKS): agis_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex) elif prop in ('attribute_accuracy', 'dataset_completeness'): agis_data_map[prop] = ParserProperty(self._parse_report_item, self._update_report_item) elif prop == DATES: agis_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates) elif prop == DIGITAL_FORMS: agis_data_map[prop] = ParserProperty( self._parse_digital_forms, self._update_digital_forms) elif prop == RASTER_INFO: agis_data_map[prop] = ParserProperty(self._parse_raster_info, self._update_raster_info) else: agis_data_map[prop] = xpath self._data_map = agis_data_map self._data_structures = agis_data_structures
def _init_data_map(self): """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """ if self._data_map is not None: return # Initiation happens once # Parse and validate the ArcGIS metadata root if self._xml_tree is None: agis_root = ARCGIS_ROOTS[0] # Default to uncapitalized else: agis_root = get_element_name(self._xml_tree) if agis_root not in ARCGIS_ROOTS: raise InvalidContent('Invalid XML root for ArcGIS metadata: {root}', root=agis_root) agis_data_map = {'_root': agis_root} agis_data_map.update(_agis_tag_formats) agis_data_structures = {} # Capture and format complex XPATHs ad_format = agis_data_map[ATTRIBUTES] agis_data_structures[ATTRIBUTES] = format_xpaths( _agis_definitions[ATTRIBUTES], label=ad_format.format(ad_path='attrlabl'), aliases=ad_format.format(ad_path='attalias'), definition=ad_format.format(ad_path='attrdef'), definition_src=ad_format.format(ad_path='attrdefs') ) bb_format = agis_data_map[BOUNDING_BOX] agis_data_structures[BOUNDING_BOX] = format_xpaths( _agis_definitions[BOUNDING_BOX], east=bb_format.format(bbox_path='eastBL'), south=bb_format.format(bbox_path='southBL'), west=bb_format.format(bbox_path='westBL'), north=bb_format.format(bbox_path='northBL') ) ct_format = agis_data_map[CONTACTS] agis_data_structures[CONTACTS] = format_xpaths( _agis_definitions[CONTACTS], name=ct_format.format(ct_path='rpIndName'), organization=ct_format.format(ct_path='rpOrgName'), position=ct_format.format(ct_path='rpPosName'), email=ct_format.format(ct_path='rpCntInfo/cntAddress/eMailAdd') ) dt_format = agis_data_map[DATES] agis_data_structures[DATES] = { DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition'), '_' + DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition/@date'), DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin'), '_' + DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin/@date'), DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd'), '_' + DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd/@date'), # Same as multiple dates, but will contain only one DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition'), '_' + DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition/@date') } agis_data_structures[DATES][DATE_TYPE_RANGE] = [ agis_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], agis_data_structures[DATES][DATE_TYPE_RANGE_END] ] agis_data_structures[DATES]['_' + DATE_TYPE_RANGE] = [ agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_BEGIN], agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_END] ] df_format = agis_data_map[DIGITAL_FORMS] agis_data_structures[DIGITAL_FORMS] = format_xpaths( _agis_definitions[DIGITAL_FORMS], name=df_format.format(df_path='formatName'), content=df_format.format(df_path='formatInfo'), decompression=df_format.format(df_path='fileDecmTech'), version=df_format.format(df_path='formatVer'), specification=df_format.format(df_path='formatSpec'), access_desc=agis_data_map['_access_desc'], access_instrs=agis_data_map['_access_instrs'], network_resource=agis_data_map['_network_resource'] ) lw_format = agis_data_map[LARGER_WORKS] agis_data_structures[LARGER_WORKS] = format_xpaths( _agis_definitions[LARGER_WORKS], title=lw_format.format(lw_path='resTitle'), edition=lw_format.format(lw_path='resEd'), origin=lw_format.format(lw_path='citRespParty/rpIndName'), online_linkage=lw_format.format(lw_path='citRespParty/rpCntInfo/cntOnlineRes/linkage'), other_citation=lw_format.format(lw_path='otherCitDet'), date=lw_format.format(lw_path='date/pubDate'), place=lw_format.format(lw_path='citRespParty/rpCntInfo/cntAddress/city'), info=lw_format.format(lw_path='citRespParty/rpOrgName') ) ps_format = agis_data_map[PROCESS_STEPS] agis_data_structures[PROCESS_STEPS] = format_xpaths( _agis_definitions[PROCESS_STEPS], description=ps_format.format(ps_path='stepDesc'), date=ps_format.format(ps_path='stepDateTm'), sources=ps_format.format(ps_path='stepSrc/srcDesc') ) # Assign XPATHS and gis_metadata.utils.ParserProperties to data map for prop, xpath in iteritems(dict(agis_data_map)): if prop in (ATTRIBUTES, CONTACTS, PROCESS_STEPS): agis_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) elif prop in (BOUNDING_BOX, LARGER_WORKS): agis_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex) elif prop in ('attribute_accuracy', 'dataset_completeness'): agis_data_map[prop] = ParserProperty(self._parse_report_item, self._update_report_item) elif prop == DATES: agis_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates) elif prop == DIGITAL_FORMS: agis_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms) else: agis_data_map[prop] = xpath self._data_map = agis_data_map self._data_structures = agis_data_structures
def _init_data_map(self): """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """ if self._data_map is not None: return # Initiation happens once # Parse and validate the FGDC metadata root if self._xml_tree is None: fgdc_root = FGDC_ROOT else: fgdc_root = get_element_name(self._xml_tree) if fgdc_root != FGDC_ROOT: raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=fgdc_root) fgdc_data_map = {'_root': FGDC_ROOT} fgdc_data_structures = {} # Capture and format other complex XPATHs ad_format = _fgdc_tag_formats[ATTRIBUTES] fgdc_data_structures[ATTRIBUTES] = format_xpaths( _fgdc_definitions[ATTRIBUTES], label=ad_format.format(ad_path='attrlabl'), aliases=ad_format.format(ad_path='attalias'), definition=ad_format.format(ad_path='attrdef'), definition_src=ad_format.format(ad_path='attrdefs') ) bb_format = _fgdc_tag_formats[BOUNDING_BOX] fgdc_data_structures[BOUNDING_BOX] = format_xpaths( _fgdc_definitions[BOUNDING_BOX], east=bb_format.format(bbox_path='eastbc'), south=bb_format.format(bbox_path='southbc'), west=bb_format.format(bbox_path='westbc'), north=bb_format.format(bbox_path='northbc') ) ct_format = _fgdc_tag_formats[CONTACTS] fgdc_data_structures[CONTACTS] = format_xpaths( _fgdc_definitions[CONTACTS], name=ct_format.format(ct_path='cntperp/cntper'), _name=ct_format.format(ct_path='cntorgp/cntper'), # If not in cntperp organization=ct_format.format(ct_path='cntperp/cntorg'), _organization=ct_format.format(ct_path='cntorgp/cntorg'), # If not in cntperp position=ct_format.format(ct_path='cntpos'), email=ct_format.format(ct_path='cntemail') ) dt_format = _fgdc_tag_formats[DATES] fgdc_data_structures[DATES] = { DATE_TYPE_MULTIPLE: dt_format.format(type_path='mdattim/sngdate/caldate'), DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='rngdates/begdate'), DATE_TYPE_RANGE_END: dt_format.format(type_path='rngdates/enddate'), DATE_TYPE_SINGLE: dt_format.format(type_path='sngdate/caldate') } fgdc_data_structures[DATES][DATE_TYPE_RANGE] = [ fgdc_data_structures[DATES][DATE_TYPE_RANGE_BEGIN], fgdc_data_structures[DATES][DATE_TYPE_RANGE_END] ] df_format = _fgdc_tag_formats[DIGITAL_FORMS] fgdc_data_structures[DIGITAL_FORMS] = format_xpaths( _fgdc_definitions[DIGITAL_FORMS], name=df_format.format(df_path='digtinfo/formname'), content=df_format.format(df_path='digtinfo/formcont'), decompression=df_format.format(df_path='digtinfo/filedec'), version=df_format.format(df_path='digtinfo/formvern'), specification=df_format.format(df_path='digtinfo/formspec'), access_desc=df_format.format(df_path='digtopt/onlinopt/oncomp'), access_instrs=df_format.format(df_path='digtopt/onlinopt/accinstr'), network_resource=df_format.format(df_path='digtopt/onlinopt/computer/networka/networkr') ) lw_format = _fgdc_tag_formats[LARGER_WORKS] fgdc_data_structures[LARGER_WORKS] = format_xpaths( _fgdc_definitions[LARGER_WORKS], title=lw_format.format(lw_path='title'), edition=lw_format.format(lw_path='edition'), origin=lw_format.format(lw_path='origin'), online_linkage=lw_format.format(lw_path='onlink'), other_citation=lw_format.format(lw_path='othercit'), date=lw_format.format(lw_path='pubdate'), place=lw_format.format(lw_path='pubinfo/pubplace'), info=lw_format.format(lw_path='pubinfo/publish') ) ps_format = _fgdc_tag_formats[PROCESS_STEPS] fgdc_data_structures[PROCESS_STEPS] = format_xpaths( _fgdc_definitions[PROCESS_STEPS], description=ps_format.format(ps_path='procdesc'), date=ps_format.format(ps_path='procdate'), sources=ps_format.format(ps_path='srcused') ) ri_format = _fgdc_tag_formats[RASTER_INFO] fgdc_data_structures[RASTER_INFO] = format_xpaths( _fgdc_definitions[RASTER_INFO], dimensions=ri_format.format(ri_path='rasttype'), row_count=ri_format.format(ri_path='rowcount'), column_count=ri_format.format(ri_path='colcount'), vertical_count=ri_format.format(ri_path='vrtcount'), x_resolution=_fgdc_tag_formats['_raster_resolution'] + '/absres', _x_resolution=_fgdc_tag_formats['__raster_resolution'] + '/longres', y_resolution=_fgdc_tag_formats['_raster_resolution'] + '/ordres', _y_resolution=_fgdc_tag_formats['__raster_resolution'] + '/latres', ) # Assign XPATHS and gis_metadata.utils.ParserProperties to fgdc_data_map fgdc_data_formats = dict(_fgdc_tag_formats) for prop, xpath in iteritems(fgdc_data_formats): if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS, PROCESS_STEPS): fgdc_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) elif prop in (BOUNDING_BOX, LARGER_WORKS): fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex) elif prop == DATES: fgdc_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates) elif prop == RASTER_INFO: fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_raster_info) else: fgdc_data_map[prop] = xpath self._data_map = fgdc_data_map self._data_structures = fgdc_data_structures