def transform_to_iso(self, original_document, original_format, harvest_object): if original_format != 'fgdc': return None transform_service = config.get('ckanext.geodatagov.fgdc2iso_service') if not transform_service: self._save_object_error('No FGDC to ISO transformation service', harvest_object, 'Import') return None # Validate against FGDC schema if self.source_config.get('validator_profiles'): profiles = self.source_config.get('validator_profiles') else: profiles = ['fgdc_minimal'] validator = Validators(profiles=profiles) for custom_validator in custom_validators: validator.add_validator(custom_validator) is_valid, profile, errors = self._validate_document( original_document, harvest_object, validator=validator) if not is_valid: # TODO: Provide an option to continue anyway return None original_document = re.sub('<\?xml(.*)\?>', '', original_document) tree = etree.fromstring(original_document) comments = tree.xpath('//comment()') for comment in comments: p = comment.getparent() if p: p.remove(comment) ptvctcnt = tree.xpath('//ptvctcnt') for node in ptvctcnt: p = node.getparent() if p and not node.text: p.remove(node) themekt = tree.xpath('//placekt') for num, node in enumerate(themekt): p = node.getparent() ###remove all but first if p and num > 0: p.remove(node) original_document = etree.tostring(tree) response = requests.post( transform_service, data=original_document.encode('utf8'), headers={'content-type': 'text/xml; charset=utf-8'}) if response.status_code == 200: # XML coming from the conversion tool is already declared and encoded as utf-8 return response.content else: msg = 'The transformation service returned an error for object {0}' if response.status_code and response.content: msg += ': [{0}] {1}'.format(response.status_code, response.content) elif response.error: msg += ': {0}'.format(response.error) self._save_object_error(msg, harvest_object, 'Import') return None
def transform_to_iso(self, original_document, original_format, harvest_object): if original_format != 'fgdc': return None transform_service = config.get('ckanext.geodatagov.fgdc2iso_service') if not transform_service: self._save_object_error('No FGDC to ISO transformation service', harvest_object, 'Import') return None # Validate against FGDC schema if self.source_config.get('validator_profiles'): profiles = self.source_config.get('validator_profiles') else: profiles = ['fgdc_minimal'] validator = Validators(profiles=profiles) for custom_validator in custom_validators: validator.add_validator(custom_validator) is_valid, profile, errors = self._validate_document(original_document, harvest_object, validator=validator) if not is_valid: # TODO: Provide an option to continue anyway return None original_document = re.sub('<\?xml(.*)\?>', '', original_document) tree = etree.fromstring(original_document) comments = tree.xpath('//comment()') for comment in comments: p = comment.getparent() if p: p.remove(comment) ptvctcnt = tree.xpath('//ptvctcnt') for node in ptvctcnt: p = node.getparent() if p and not node.text: p.remove(node) themekt = tree.xpath('//placekt') for num, node in enumerate(themekt): p = node.getparent() ###remove all but first if p and num > 0: p.remove(node) original_document = etree.tostring(tree) response = requests.post(transform_service, data=original_document.encode('utf8'), headers={'content-type': 'text/xml; charset=utf-8'}) if response.status_code == 200: # XML coming from the conversion tool is already declared and encoded as utf-8 return response.content else: msg = 'The transformation service returned an error for object {0}' if response.status_code and response.content: msg += ': [{0}] {1}'.format(response.status_code, response.content) elif response.error: msg += ': {0}'.format(response.error) self._save_object_error(msg ,harvest_object,'Import') return None