def _parse_results_set_info(self):
        result_elem = extract_elem(self.parser.xml, ['SearchResults'])

        self.total = extract_attrib(result_elem, ['@numberOfRecordsMatched'])
        self.subtotal = extract_attrib(result_elem,
                                       ['@numberOfRecordsReturned'])
        self.schema = extract_attrib(result_elem, ['@recordSchema'])
    def parse(self):
        output = {}
        urls = set()

        if 'service' in self.identify:
            service = {
                "object_id": generate_uuid_urn(),
                "dcterms:title": extract_attrib(self.parser.xml, ['@name']),
                "rdf:type": "UNIDATA:THREDDS {0}".format(
                    extract_attrib(self.parser.xml, ['@version'])),
                "bcube:dateCreated":
                    self.harvest_details.get('harvest_date', ''),
                "bcube:lastUpdated":
                    self.harvest_details.get('harvest_date', ''),
                "relationships": [],
                "urls": []
            }
            url_sha = generate_sha_urn(self.url)
            urls.add(url_sha)
            original_url = self._generate_harvest_manifest(**{
                "bcube:hasUrlSource": "Harvested",
                "bcube:hasConfidence": "Good",
                "vcard:hasURL": self.url,
                "object_id": url_sha
            })
            service['urls'].append(original_url)
            # NOTE: this is not the sha from the url
            service['relationships'].append(
                {
                    "relate": "bcube:originatedFrom",
                    "object_id": url_sha
                }
            )

        # deal with the "dataset"
        service_bases = self.parser.xml.xpath(
            '//*[local-name()="service" and @base != ""]'
        )
        self.service_bases = {
            s.attrib.get('name'): s.attrib.get('base') for s in service_bases
        }

        # if 'dataset' in self.identify:
        #     # TODO: this is really not right but it is not
        #     # a proper web service so meh
        #     datasets = self._parse_datasets()

        # # if 'metadata' in self.identify:
        # #     self.description['metadata'] = self._parse_metadata()
        output['services'] = [service]
        self.description = tidy_dict(output)
    def _parse_keywords(self, elem):
        '''
        for each descriptiveKeywords block
        in an identification block
        '''
        keywords = []

        for key_elem in extract_elems(elem, ['descriptiveKeywords']):
            # TODO: split these up (if *-delimited in some way)
            terms = extract_items(
                key_elem,
                ['MD_Keywords', 'keyword', 'CharacterString'])
            key_type = extract_attrib(
                key_elem,
                ['MD_Keywords', 'type', 'MD_KeywordTypeCode', '@codeListValue']
            )
            thesaurus = extract_item(
                key_elem,
                [
                    'MD_Keywords',
                    'thesaurusName',
                    'CI_Citation',
                    'title',
                    'CharacterString'
                ]
            )

            if terms:
                keywords.append(
                    tidy_dict({
                        "object_id": generate_uuid_urn(),
                        "dc:partOf": thesaurus,
                        "bcube:hasType": key_type,
                        "bcube:hasValue": terms
                    })
                )

        # TODO: add the Anchor element handling
        #       ['descriptiveKeywords', 'MD_Keywords', 'keyword', 'Anchor']

        # add a generic set for the iso topic category
        isotopics = extract_items(
            elem, ['topicCategory', 'MD_TopicCategoryCode'])
        if isotopics:
            keywords.append(
                tidy_dict({
                    "object_id": generate_uuid_urn(),
                    "dc:partOf": 'IsoTopicCategories',
                    "bcube:hasValue": isotopics
                })
            )

        return keywords
示例#4
0
    def _handle_operations(self):
        elems = extract_elems(self.elem,
                              ['containsOperations', 'SV_OperationMetadata'])

        ops = []
        for e in elems:
            op = {}
            op['name'] = extract_item(e, ['operationName', 'CharacterString'])
            op['method'] = extract_attrib(e,
                                          ['DCP', 'DCPList', '@codeListValue'])
            op['url'] = extract_item(
                e, ['connectPoint', 'CI_OnlineResource', 'linkage', 'URL'])
            op['parameters'] = [
                self._handle_parameter(pe)
                for pe in extract_elems(e, ['parameters', 'SV_Parameter'])
            ]
            ops.append(op)

        return ops
示例#5
0
    def _parse_keywords(self, elem):
        '''
        for each descriptiveKeywords block
        in an identification block
        '''
        keywords = []

        for key_elem in extract_elems(elem, ['descriptiveKeywords']):
            # TODO: split these up (if *-delimited in some way)
            terms = extract_items(
                key_elem, ['MD_Keywords', 'keyword', 'CharacterString'])
            key_type = extract_attrib(key_elem, [
                'MD_Keywords', 'type', 'MD_KeywordTypeCode', '@codeListValue'
            ])
            thesaurus = extract_item(key_elem, [
                'MD_Keywords', 'thesaurusName', 'CI_Citation', 'title',
                'CharacterString'
            ])

            if terms:
                keywords.append(
                    tidy_dict({
                        "object_id": generate_uuid_urn(),
                        "dc:partOf": thesaurus,
                        "bcube:hasType": key_type,
                        "bcube:hasValue": terms
                    }))

        # TODO: add the Anchor element handling
        #       ['descriptiveKeywords', 'MD_Keywords', 'keyword', 'Anchor']

        # add a generic set for the iso topic category
        isotopics = extract_items(elem,
                                  ['topicCategory', 'MD_TopicCategoryCode'])
        if isotopics:
            keywords.append(
                tidy_dict({
                    "object_id": generate_uuid_urn(),
                    "dc:partOf": 'IsoTopicCategories',
                    "bcube:hasValue": isotopics
                }))

        return keywords
    def _handle_operations(self):
        elems = extract_elems(
            self.elem,
            ['containsOperations', 'SV_OperationMetadata'])

        ops = []
        for e in elems:
            op = {}
            op['name'] = extract_item(
                e,
                ['operationName', 'CharacterString'])
            op['method'] = extract_attrib(
                e,
                ['DCP', 'DCPList', '@codeListValue'])
            op['url'] = extract_item(
                e,
                ['connectPoint', 'CI_OnlineResource', 'linkage', 'URL'])
            op['parameters'] = [
                self._handle_parameter(pe) for pe in
                extract_elems(e, ['parameters', 'SV_Parameter'])]
            ops.append(op)

        return ops
    def _parse_results_set_info(self):
        result_elem = extract_elem(self.parser.xml, ['SearchResults'])

        self.total = extract_attrib(result_elem, ['@numberOfRecordsMatched'])
        self.subtotal = extract_attrib(result_elem, ['@numberOfRecordsReturned'])
        self.schema = extract_attrib(result_elem, ['@recordSchema'])