def _parse_results_set_info(self): result_elem = extract_elem(self.parser.xml, ['SearchResults']) self.total = extract_attrib(result_elem, ['@numberOfRecordsMatched']) self.subtotal = extract_attrib(result_elem, ['@numberOfRecordsReturned']) self.schema = extract_attrib(result_elem, ['@recordSchema'])
def parse(self): output = {} urls = set() if 'service' in self.identify: service = { "object_id": generate_uuid_urn(), "dcterms:title": extract_attrib(self.parser.xml, ['@name']), "rdf:type": "UNIDATA:THREDDS {0}".format( extract_attrib(self.parser.xml, ['@version'])), "bcube:dateCreated": self.harvest_details.get('harvest_date', ''), "bcube:lastUpdated": self.harvest_details.get('harvest_date', ''), "relationships": [], "urls": [] } url_sha = generate_sha_urn(self.url) urls.add(url_sha) original_url = self._generate_harvest_manifest(**{ "bcube:hasUrlSource": "Harvested", "bcube:hasConfidence": "Good", "vcard:hasURL": self.url, "object_id": url_sha }) service['urls'].append(original_url) # NOTE: this is not the sha from the url service['relationships'].append( { "relate": "bcube:originatedFrom", "object_id": url_sha } ) # deal with the "dataset" service_bases = self.parser.xml.xpath( '//*[local-name()="service" and @base != ""]' ) self.service_bases = { s.attrib.get('name'): s.attrib.get('base') for s in service_bases } # if 'dataset' in self.identify: # # TODO: this is really not right but it is not # # a proper web service so meh # datasets = self._parse_datasets() # # if 'metadata' in self.identify: # # self.description['metadata'] = self._parse_metadata() output['services'] = [service] self.description = tidy_dict(output)
def _parse_keywords(self, elem): ''' for each descriptiveKeywords block in an identification block ''' keywords = [] for key_elem in extract_elems(elem, ['descriptiveKeywords']): # TODO: split these up (if *-delimited in some way) terms = extract_items( key_elem, ['MD_Keywords', 'keyword', 'CharacterString']) key_type = extract_attrib( key_elem, ['MD_Keywords', 'type', 'MD_KeywordTypeCode', '@codeListValue'] ) thesaurus = extract_item( key_elem, [ 'MD_Keywords', 'thesaurusName', 'CI_Citation', 'title', 'CharacterString' ] ) if terms: keywords.append( tidy_dict({ "object_id": generate_uuid_urn(), "dc:partOf": thesaurus, "bcube:hasType": key_type, "bcube:hasValue": terms }) ) # TODO: add the Anchor element handling # ['descriptiveKeywords', 'MD_Keywords', 'keyword', 'Anchor'] # add a generic set for the iso topic category isotopics = extract_items( elem, ['topicCategory', 'MD_TopicCategoryCode']) if isotopics: keywords.append( tidy_dict({ "object_id": generate_uuid_urn(), "dc:partOf": 'IsoTopicCategories', "bcube:hasValue": isotopics }) ) return keywords
def _handle_operations(self): elems = extract_elems(self.elem, ['containsOperations', 'SV_OperationMetadata']) ops = [] for e in elems: op = {} op['name'] = extract_item(e, ['operationName', 'CharacterString']) op['method'] = extract_attrib(e, ['DCP', 'DCPList', '@codeListValue']) op['url'] = extract_item( e, ['connectPoint', 'CI_OnlineResource', 'linkage', 'URL']) op['parameters'] = [ self._handle_parameter(pe) for pe in extract_elems(e, ['parameters', 'SV_Parameter']) ] ops.append(op) return ops
def _parse_keywords(self, elem): ''' for each descriptiveKeywords block in an identification block ''' keywords = [] for key_elem in extract_elems(elem, ['descriptiveKeywords']): # TODO: split these up (if *-delimited in some way) terms = extract_items( key_elem, ['MD_Keywords', 'keyword', 'CharacterString']) key_type = extract_attrib(key_elem, [ 'MD_Keywords', 'type', 'MD_KeywordTypeCode', '@codeListValue' ]) thesaurus = extract_item(key_elem, [ 'MD_Keywords', 'thesaurusName', 'CI_Citation', 'title', 'CharacterString' ]) if terms: keywords.append( tidy_dict({ "object_id": generate_uuid_urn(), "dc:partOf": thesaurus, "bcube:hasType": key_type, "bcube:hasValue": terms })) # TODO: add the Anchor element handling # ['descriptiveKeywords', 'MD_Keywords', 'keyword', 'Anchor'] # add a generic set for the iso topic category isotopics = extract_items(elem, ['topicCategory', 'MD_TopicCategoryCode']) if isotopics: keywords.append( tidy_dict({ "object_id": generate_uuid_urn(), "dc:partOf": 'IsoTopicCategories', "bcube:hasValue": isotopics })) return keywords
def _handle_operations(self): elems = extract_elems( self.elem, ['containsOperations', 'SV_OperationMetadata']) ops = [] for e in elems: op = {} op['name'] = extract_item( e, ['operationName', 'CharacterString']) op['method'] = extract_attrib( e, ['DCP', 'DCPList', '@codeListValue']) op['url'] = extract_item( e, ['connectPoint', 'CI_OnlineResource', 'linkage', 'URL']) op['parameters'] = [ self._handle_parameter(pe) for pe in extract_elems(e, ['parameters', 'SV_Parameter'])] ops.append(op) return ops