def test_get_record(self):
        dataset = factories.Dataset(name='dataset_test_api_export', author='Test Plugin')
        repository = OAIPMHRepository()
        oaipmh_identifier = repository.record_access._get_oaipmh_id(dataset.get(repository.id_field))

        request_content = repository.handle_request('GetRecord', {'identifier':oaipmh_identifier, 
                                                                        'metadataPrefix':'oai_dc'}, 'REQUEST_URL')
        oaipmh_record = XMLRecord(MetadataFormats().get_metadata_formats('oai_pmh')[0], request_content)

        # validate the XML
        assert_true(repository._is_valid_oai_pmh_record(oaipmh_record.get_xml_dict()))
        assert_false(repository._is_error_oai_pmh_record(oaipmh_record.get_xml_dict()))
    def test_list_identifiers(self):
        dataset = factories.Dataset(name='dataset_test_api_export_01', author='Test Plugin')
        dataset = factories.Dataset(name='bad_dataset_test_api_export', author='Test Plugin')
        dataset = factories.Dataset(name='dataset_test_api_export_02', author='Test Plugin')
        repository = OAIPMHRepository()

        request_content = repository.handle_request('ListIdentifiers', {'metadataPrefix':'oai_dc'}, 'REQUEST_URL')
        oaipmh_record = XMLRecord(MetadataFormats().get_metadata_formats('oai_pmh')[0], request_content)

        # validate the XML
        assert_true(repository._is_valid_oai_pmh_record(oaipmh_record.get_xml_dict()))
        assert_false(repository._is_error_oai_pmh_record(oaipmh_record.get_xml_dict()))
    def convert(self, record):
        if self.can_convert(record):
            dataset_dict = record.get_json_dict()
            oai_dc_dict = collections.OrderedDict()
            oai_dc_dict['oai_dc:dc'] = collections.OrderedDict()
            oai_dc_dict['oai_dc:dc']['@xmlns:oai_dc']='http://www.openarchives.org/OAI/2.0/oai_dc/'
            oai_dc_dict['oai_dc:dc']['@xmlns:dc']='http://purl.org/dc/elements/1.1/'
            oai_dc_dict['oai_dc:dc']['@xmlns:xsi']='http://www.w3.org/2001/XMLSchema-instance'
            oai_dc_dict['oai_dc:dc']['@xsi:schemaLocation'] = 'http://www.openarchives.org/OAI/2.0/oai_dc/  http://www.openarchives.org/OAI/2.0/oai_dc.xsd'

            oai_dc_dict['oai_dc:dc']['dc:identifier']= dataset_dict.get('id','')
            oai_dc_dict['oai_dc:dc']['dc:identifier']= dataset_dict.get('name','')
            oai_dc_dict['oai_dc:dc']['dc:creator']= dataset_dict.get('author','')
            oai_dc_dict['oai_dc:dc']['dc:date']= dataset_dict.get('metadata_modified','2017').split('-')[0]
            oai_dc_dict['oai_dc:dc']['dc:title']= dataset_dict.get('title','')
            oai_dc_dict['oai_dc:dc']['dc:type']= 'Dataset'

            converted_record = Record(self.output_format, unparse(oai_dc_dict, pretty=True))
            return XMLRecord.from_record(converted_record)

            return converted_record
        else:
            raise TypeError(('Converter is not compatible with the record format {record_format}({record_version}). ' +
                             'Accepted format is CKAN {input_format}.').format(
                                 record_format=record.get_metadata_format().get_format_name(), record_version=record.get_metadata_format().get_version(),
                                 input_format=self.get_input_format().get_format_name()))
    def _is_valid_oai_pmh_record(self, xmldict, metadata_prefix=''):
        site_url = config.get('ckan.site_url', '')

        if not metadata_prefix:
            metadata_prefix = 'oai_dc'
        try:
            xml_record = unparse(xmldict)

            oai_pmh_record = XMLRecord(
                MetadataFormats().get_metadata_formats('oai_pmh')[0],
                xml_record)

            # get the format
            metadata_format = MetadataFormats().get_metadata_formats(
                metadata_prefix)[0]
            metadata_schema = metadata_format.get_xsd_url()

            # local xsd for gcmd_dif (nasa hosted is not always available)
            if metadata_prefix == 'gcmd_dif':
                metadata_schema = metadata_schema.replace(
                    'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/',
                    site_url + '/package_converter_xsd/')

            # modify xsd due to library bug
            fixed_xsd = '''<xs:schema xmlns="http://www.openarchives.org/OAI/2.0/"
                                  xmlns:xs="http://www.w3.org/2001/XMLSchema"
                                  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                                  xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" >
                           <xs:import namespace="http://www.openarchives.org/OAI/2.0/" schemaLocation="http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd" />
                           <xs:import namespace="{namespace}" schemaLocation="{schema}" />
                       </xs:schema>'''.format(
                namespace=metadata_format.get_namespace(),
                schema=metadata_schema)

            return oai_pmh_record.validate(custom_xsd=fixed_xsd)

        except Exception as e:
            print(e)
            log.error('Failed to validate OAI-PMH for format {0}'.format(
                metadata_prefix))
        except:
            log.error('Failed to validate OAI-PMH for format {0}'.format(
                metadata_prefix))
            return False
示例#5
0
    def _export_dataset(self, ckan_id, oai_identifier, datestamp, format, state='active', entity='package'):

        if state != 'active':
            return (self._envelop_record(oai_identifier, datestamp, {}, state))

        # Convert record
        try:
            converted_record = export_as_record(ckan_id, format, type=entity)
            record = XMLRecord.from_record(converted_record)

        except Exception as e:
            log.exception(e)
            record = None
        if not record:
            raise oaipmh_error.CannotDisseminateFormatError()

        return self._envelop_record(oai_identifier, datestamp, record.get_xml_dict(), state)
示例#6
0
    def publish(self, doi, pkg=None, context={}, *args, **kwargs):

        update_doi = kwargs.get('update', False)

        # dataset data
        package_id = pkg['id']
        url = config.get('ckan.site_url', '') + '/dataset/' + pkg.get(
            'name', pkg['id'])

        if self.url_prefix:
            url = self.url_prefix + pkg.get('name', pkg['id'])

        if update_doi:
            log.debug("*** Updating id = {0}, url = {1}".format(
                package_id, url))
            # check published data match
            published_ids = self.get_doi_identifiers(doi)
            if published_ids and package_id not in published_ids and pkg.get(
                    'name') not in published_ids:
                return None, 'Dataset id ({0}, {1}) do not match published ids: [{2}]'.format(
                    package_id, pkg.get('name'), ', '.join(published_ids))
        else:
            log.debug("Publishing id = {0}, url = {1}".format(package_id, url))

        # get converted package
        metadata_format = 'datacite'

        try:
            converted_package = toolkit.get_action('package_export')(
                context, {
                    'id': package_id,
                    'format': metadata_format
                })
        except toolkit.ObjectNotFound:
            return None, 'Dataset not found'

        xml = converted_package.replace('\n', '').replace('\t', '')

        # Validate
        try:
            converted_record = XMLRecord.from_record(
                Record(
                    MetadataFormats().get_metadata_formats(metadata_format)[0],
                    xml))
            validation_result = converted_record.validate()
            log.debug("Validation result: {0}".format(validation_result))
        except Exception as e:
            log.error("Converted Validation FAILED, exception: {0}".format(e))
            traceback.print_exc()
            validation_result = False

        if not validation_result:
            return None, 'Dataset XML validation failed'

        # encode 64
        xml_bytes = xml
        if isinstance(xml, str):
            xml_bytes = xml.encode('utf-8')
        xml_encoded = base64.b64encode(xml_bytes)

        # prepare JSON
        headers = {"Content-Type": "application/vnd.api+json"}
        auth = HTTPBasicAuth(self.account_name, self.account_password)

        data = collections.OrderedDict()
        data['id'] = doi
        data['type'] = 'dois'
        data['attributes'] = collections.OrderedDict()
        # TODO check for update if this state is correct
        if update_doi:
            data['attributes']['event'] = ""
        else:
            data['attributes']['event'] = "publish"
        data['attributes']['doi'] = doi
        data['attributes']['url'] = url
        data['attributes']['xml'] = xml_encoded.decode()
        args = {'data': data}

        args_json = json.dumps(args)
        # log.debug(args_json)

        datacite_url_endpoint = self.datacite_url
        if update_doi:
            datacite_url_endpoint = self.datacite_url + '/' + doi
        log.debug(
            " REST request send to URL: {0}".format(datacite_url_endpoint))

        if update_doi:
            r = requests.put(datacite_url_endpoint,
                             headers=headers,
                             auth=auth,
                             data=args_json)
        else:
            r = requests.post(datacite_url_endpoint,
                              headers=headers,
                              auth=auth,
                              data=args_json)

        # print(r.status_code)
        # print(r.json())

        if r.status_code == 201 or r.status_code == 200:
            published_doi = r.json().get('data').get('id')
            return published_doi, None
        else:
            if update_doi:
                return None, 'Error updating to DataCite: HTTP Code: {0}, error: {1}'.format(
                    r.status_code, r.json())
            else:
                return None, 'Error publishing to DataCite: HTTP Code: {0}, error: {1}'.format(
                    r.status_code, r.json())
 def test_list_metadata_formats(self):
     request_content = OAIPMHRepository().handle_request('ListMetadataFormats', {}, 'REQUEST_URL')
     oaipmh_record = XMLRecord(MetadataFormats().get_metadata_formats('oai_pmh')[0], request_content)
     # validate the XML
     assert_true(oaipmh_record.validate())
     assert_false(OAIPMHRepository()._is_error_oai_pmh_record(oaipmh_record.get_xml_dict()))