Пример #1
0
def record_existing_unique_identifier(package_id, identifier):
    """
    Based on a provided identifier, checks datacite for an existing DOI
    Saves to local CKAN database
    :param package_id: string
    :param identifier: string
    :return DOI object if saved, false if it didn't exist in datacite
    """
    datacite_api = DOIDataCiteAPI()

    # Check this identifier doesn't exist in the table
    existing_doi = Session.query(DOI).filter(DOI.identifier == identifier).first()
    if not existing_doi:
        # And check against the datacite service
        try:
            datacite_doi = datacite_api.get(identifier)
            if datacite_doi.text:
                # Determine whether or not we need to delete a doi that points to the current dataset
                doi_for_this_pkg = Session.query(DOI).filter(DOI.package_id == package_id).first()
                if doi_for_this_pkg:
                    datacite_api
                doi = DOI(package_id=package_id, identifier=identifier)
                Session.add(doi)
                Session.commit()
                return doi
        except HTTPError:
            pass
Пример #2
0
def create_unique_identifier(package_id):
    """
    Create a unique identifier, using the prefix and a random number: 10.5072/0044634
    Checks the random number doesn't exist in the table or the datacite repository
    All unique identifiers are created with
    @return:
    """
    datacite_api = DOIDataCiteAPI()

    while True:

        identifier = os.path.join(get_prefix(), '{0:07}'.format(random.randint(1, 100000)))

        # Check this identifier doesn't exist in the table
        if not Session.query(DOI).filter(DOI.identifier == identifier).count():

            # And check against the datacite service
            try:
                datacite_doi = datacite_api.get(identifier)
            except HTTPError:
                pass
            else:
                if datacite_doi.text:
                    continue

        doi = DOI(package_id=package_id, identifier=identifier)
        Session.add(doi)
        Session.commit()

        return doi
Пример #3
0
def check_existing_doi(key, flattened_data, errors, context):
    """
        Based on a provided identifier, checks datacite for an existing DOI
        :param package_id: string
        :param identifier: string
        :return DOI object if saved, false if it didn't exist in datacite
        """
    datacite_api = DOIDataCiteAPI()
    identifier = flattened_data[key]
    identifier_type = flattened_data[('identifier_type',)]
    package_id = flattened_data[('id',)]

    existing_doi = Session.query(DOI).filter(DOI.identifier == identifier).first()
    if not existing_doi:
        # And check against the datacite service
        try:
            datacite_doi = datacite_api.get(identifier)
            if not datacite_doi.text:
                raise Invalid("DOI %s does not exist in Datacite" % identifier)
        except HTTPError:
            raise Invalid("DOI %s does not exist in Datacite" % identifier)
            pass
    else:
        if not existing_doi.package_id == package_id:
            log.error('This DOI already exists and belongs to %s' % existing_doi.package_id)
            raise Invalid('This DOI already exists and belongs to %s' % existing_doi.package_id)
Пример #4
0
def create_unique_identifier(package_id):
    """
    Create a unique identifier, using the prefix and a random number: 10.5072/0044634
    Checks the random number doesn't exist in the table or the datacite repository
    All unique identifiers are created with
    @return:
    """
    datacite_api = DOIDataCiteAPI()

    while True:

        identifier = os.path.join(get_prefix(), '{0:07}'.format(random.randint(1, 100000)))

        # Check this identifier doesn't exist in the table
        if not Session.query(DOI).filter(DOI.identifier == identifier).count():

            # And check against the datacite service
            try:
                datacite_doi = datacite_api.get(identifier)
            except HTTPError:
                pass
            else:
                if datacite_doi.text:
                    continue

        doi = DOI(package_id=package_id, identifier=identifier)
        Session.add(doi)
        Session.commit()

        return doi
Пример #5
0
def get_or_create_doi(package_id):
    '''Create or retrieve the unique identifier for this package_id.

    :param package_id:

    '''
    datacite_api = DOIDataCiteAPI()

    doi = get_doi(package_id)
    if doi is None:
        while True:
            identifier = os.path.join(
                get_prefix(), u'{0:07}'.format(random.randint(1, 100000)))

            # Check this identifier doesn't exist in the table
            if Session.query(DOI).filter(DOI.identifier == identifier).count():
                continue

            # And check against the datacite service
            try:
                datacite_doi = datacite_api.get(identifier)
            except HTTPError:
                pass
            else:
                if datacite_doi.text:
                    continue

            doi = DOI(package_id=package_id, identifier=identifier)
            Session.add(doi)
            Session.commit()
            break

    return doi
Пример #6
0
def publish_doi(package_id, **kwargs):
    """
    Publish a DOI to DataCite

    Need to create metadata first
    And then create DOI => URI association
    See MetadataDataCiteAPI.metadata_to_xml for param information
    @param package_id:
    @param title:
    @param creator:
    @param publisher:
    @param publisher_year:
    @param kwargs:
    @return: request response
    """
    identifier = kwargs.get('identifier')

    metadata = MetadataDataCiteAPI()
    metadata.upsert(**kwargs)

    # The ID of a dataset never changes, so use that for the URL
    url = os.path.join(get_site_url(), 'dataset', package_id)

    doi = DOIDataCiteAPI()
    r = doi.upsert(doi=identifier, url=url)
    assert r.status_code == 201, 'Operation failed ERROR CODE: %s' % r.status_code

    # If we have created the DOI, save it to the database
    if r.text == 'OK':
        # Update status for this package and identifier
        num_affected = Session.query(DOI).filter_by(
            package_id=package_id, identifier=identifier).update(
                {"published": datetime.datetime.now()})
        # Raise an error if update has failed - should never happen unless
        # DataCite and local db get out of sync - in which case requires investigating
        assert num_affected == 1, 'Updating local DOI failed'

    log.debug('Created new DOI for package %s' % package_id)
Пример #7
0
def publish_doi(package_id, **kwargs):

    """
    Publish a DOI to DataCite

    Need to create metadata first
    And then create DOI => URI association
    See MetadataDataCiteAPI.metadata_to_xml for param information
    @param package_id:
    @param title:
    @param creator:
    @param publisher:
    @param publisher_year:
    @param kwargs:
    @return: request response
    """
    identifier = kwargs.get('identifier')

    metadata = MetadataDataCiteAPI()
    metadata.upsert(**kwargs)

    # The ID of a dataset never changes, so use that for the URL
    url = os.path.join(get_site_url(), 'dataset', package_id)

    doi = DOIDataCiteAPI()
    r = doi.upsert(doi=identifier, url=url)
    assert r.status_code == 201, 'Operation failed ERROR CODE: %s' % r.status_code

    # If we have created the DOI, save it to the database
    if r.text == 'OK':
        # Update status for this package and identifier
        num_affected = Session.query(DOI).filter_by(package_id=package_id, identifier=identifier).update({"published": datetime.datetime.now()})
        # Raise an error if update has failed - should never happen unless
        # DataCite and local db get out of sync - in which case requires investigating
        assert num_affected == 1, 'Updating local DOI failed'

    log.debug('Created new DOI for package %s' % package_id)