Exemplo n.º 1
0
def update(recid):
    """Update metadata for record with given recid in DataCite."""
    uuid = PersistentIdentifier.get('recid', recid).object_uuid
    record = Record.get_record(uuid)
    doi = record['doi']

    try:
        provider = DataCiteProviderWrapper.get(pid_value=doi,
                                               pid_type='doi')
    except PIDDoesNotExistError:
        raise ClickException('Record with DOI {} not registered in DataCite.'
                             .format(doi))

    # serialize record to schema40
    doc = DataCiteSerializer().dump(record).data
    schema40.validate(doc)
    doc = schema40.tostring(doc)
    landing_page = os.path.join(
        current_app.config.get('PIDSTORE_LANDING_BASE_URL'),
        recid)

    provider.update(url=landing_page,
                    doc=doc)
    db.session.commit()

    click.echo('Record with DOI {} updated in DataCite'.format(doi))
Exemplo n.º 2
0
def register(recid):
    """Register record with given recid in DataCite."""
    uuid = PersistentIdentifier.get('recid', recid).object_uuid
    record = Record.get_record(uuid)
    experiment = record.get('experiment', None)
    doi = record['doi']

    try:
        provider = DataCiteProviderWrapper.get(pid_value=doi,
                                               pid_type='doi')
    except PIDDoesNotExistError:
        provider = DataCiteProviderWrapper.create(pid_value=doi,
                                                  experiment=experiment)

    # serialize record to schema40
    doc = DataCiteSerializer().dump(record).data
    schema40.validate(doc)
    doc = schema40.tostring(doc)
    landing_page = os.path.join(
        current_app.config.get('PIDSTORE_LANDING_BASE_URL'),
        recid)

    provider.register(url=landing_page,
                      doc=doc)
    db.session.commit()

    click.echo('Record registered with DOI {}'.format(doi))
Exemplo n.º 3
0
def register(uuid):
    """Register record with given uuid in DataCite."""
    record = Record.get_record(uuid)
    experiment = record.get('experiment', None)
    doi = record['doi']

    try:
        provider = DataCiteProviderWrapper.get(pid_value=doi,
                                               pid_type='doi')
    except PIDDoesNotExistError:
        provider = DataCiteProviderWrapper.create(pid_value=doi,
                                                  experiment=experiment)

    # serialize record to schema40
    doc = DataCiteSerializer().dump(record).data
    schema40.validate(doc)
    doc = schema40.tostring(doc)
    landing_page = '{}/{}'.format(
        current_app.config.get('PIDSTORE_LANDING_BASE_URL'),
        doi)
    provider.register(url=landing_page,
                      doc=doc)
    db.session.commit()

    click.echo('Record registered with DOI {}'.format(doi))
def update_record(idv, username, password, datacite_password):
    url = 'https://' + username + ':' + password + '@authors.library.caltech.edu/rest/eprint/'
    record_url = url + str(idv) + '.xml'
    record = subprocess.check_output(["eputil", record_url],
                                     universal_newlines=True)
    eprint = xmltodict.parse(record)['eprints']['eprint']
    metadata = caltech_thesis.epxml_to_datacite(eprint)

    assert schema40.validate(metadata)
    #Debugging if this fails
    #v = schema40.validator.validate(metadata)
    #errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
    #for error in errors:
    #        print(error.message)

    # Initialize the MDS client.
    d = DataCiteMDSClient(
        username='******',
        password=datacite_password,
        prefix='10.7907',
    )

    xml = schema40.tostring(metadata)
    result = d.metadata_post(xml)
    print(result)
Exemplo n.º 5
0
def update_doi(doi,metadata,url=''):

    password = os.environ['DATACITE']
    prefix = doi.split('/')[0]
    #Ensure metadata identifier matches that given in function
    metadata['identifier'] = {'identifier':doi,'identifierType':'DOI'}

    # Initialize the MDS client.
    d = DataCiteMDSClient(
        username='******',
        password=password,
        prefix=prefix,
        url='https://mds.datacite.org'
        #test_mode=True
        )

    result =  schema40.validate(metadata)
    #Debugging if this fails
    if result == False:
        v = schema40.validator.validate(metadata)
        errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
        for error in errors:
            print(error.message)
        exit()

    xml = schema40.tostring(metadata)

    response = d.metadata_post(xml)
    print(response)
    if url != '':
        response = d.doi_post(doi,url)
        print(response)
Exemplo n.º 6
0
def create_doi(metadata, url):

    password = os.environ['DATACITE']
    prefix = '10.33569'

    # Initialize the MDS client.
    d = DataCiteMDSClient(username='******',
                          password=password,
                          prefix=prefix,
                          url='https://mds.test.datacite.org'
                          #test_mode=True
                          )

    result = schema40.validate(metadata)
    #Debugging if this fails
    if result == False:
        v = schema40.validator.validate(metadata)
        errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
        for error in errors:
            print(error.message)
        exit()

    #Provide only prefix in identifier field to let DataCite generate DOI
    metadata['identifier'] = {'identifier': prefix, 'identifierType': 'DOI'}

    xml = schema40.tostring(metadata)

    response = d.metadata_post(xml)
    print(response)
    identifier = response.split('(')[1].split(')')[0]
    response = d.doi_post(identifier, url)
    print(response)
def check_record(idv, username, password):
    url = 'https://' + username + ':' + password + '@authors.library.caltech.edu/rest/eprint/'
    record_url = url + str(idv) + '.xml'
    record = subprocess.check_output(["eputil", record_url],
                                     universal_newlines=True)
    eprint = xmltodict.parse(record)['eprints']['eprint']

    name_and_series_exceptions = []
    if 'other_numbering_system' in eprint:
        if isinstance(eprint['other_numbering_system']['item'], list) == False:
            #Deal with single item listings
            eprint['other_numbering_system']['item'] = [
                eprint['other_numbering_system']['item']
            ]

    if 'series_name' in eprint and 'number' in eprint:
        name_and_series = [eprint['series_name'], eprint['number']]
    elif 'other_numbering_system' in eprint:
        ids = []
        #Assume first is correct
        item = eprint['other_numbering_system']['item'][0]
        name_and_series_exceptions =\
        [item['name']['#text'],item['id'],'other_numbering',eprint['official_url']]
    elif 'local_group' in eprint:
        resolver = eprint['official_url'].split(':')
        number = resolver[-1]
        name_and_series_exceptions =\
            [eprint['local_group']['item'],number,'local_group_resolver',eprint['official_url']]
    else:
        resolver = eprint['official_url'].split(':')
        name = resolver[1].split('/')[-1]
        number = resolver[-1]
        name_and_series_exceptions =\
            [name,number,'resolver',eprint['official_url']]

    try:
        metadata = caltech_authors_tech_report.epxml_to_datacite(eprint)

    except:
        print("Incorrect document type- skipping")
        pass
        return []

    try:
        assert schema40.validate(metadata)
    except:
        #Debugging if this fails
        v = schema40.validator.validate(metadata)
        errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
        for error in errors:
            print(error.message)

    return name_and_series_exceptions
Exemplo n.º 8
0
def update_datacite_metadata(collection, token, access):
    """Access contains username, password, and prefix for DataCite"""
    keys = dataset.keys(collection)
    for a in access:

        username = a["username"]
        password = a["password"]
        prefix = a["prefix"]

        # Initialize the MDS client.
        d = DataCiteMDSClient(
            username=username,
            password=password,
            prefix=prefix,
            url="https://mds.datacite.org",
        )

        for k in keys:
            print(k)
            metadata, err = dataset.read(collection, k)
            if err != "":
                print(err)
                exit()
            # Get rid of Key from dataset
            metadata.pop("_Key")

            if "identifier" in metadata:
                record_doi = metadata["identifier"]["identifier"]

                # Handle records with 4.3 metadata elements
                if "schemaVersion" in metadata:
                    metadata.pop("schemaVersion")
                if "types" in metadata:
                    metadata.pop("types")

                if record_doi.split("/")[0] == prefix:
                    result = schema40.validate(metadata)
                    # Debugging if this fails
                    if result == False:
                        print(metadata)
                        v = schema40.validator.validate(metadata)
                        errors = sorted(v.iter_errors(instance),
                                        key=lambda e: e.path)
                        for error in errors:
                            print(error.message)
                        exit()

                    xml = schema40.tostring(metadata)

                    response = d.metadata_post(xml)
                    print(response)
    def build_metadata(self, node):
        """Return the formatted datacite metadata XML as a string.
         """

        data = {
            'identifier': {
                'identifier': self.build_doi(node),
                'identifierType': 'DOI',
            },
            'creators': [{
                'creatorName': user.fullname,
                'givenName': user.given_name,
                'familyName': user.family_name
            } for user in node.visible_contributors],
            'titles': [{
                'title': node.title
            }],
            'publisher':
            'Open Science Framework',
            'publicationYear':
            str(datetime.datetime.now().year),
            'resourceType': {
                'resourceType': 'Project',
                'resourceTypeGeneral': 'Text'
            }
        }

        if node.description:
            data['descriptions'] = [{
                'descriptionType': 'Abstract',
                'description': node.description
            }]

        if node.node_license:
            data['rightsList'] = [{
                'rights': node.node_license.name,
                'rightsURI': node.node_license.url
            }]

        # Validate dictionary
        assert schema40.validate(data)

        # Generate DataCite XML from dictionary.
        return schema40.tostring(data)
Exemplo n.º 10
0
    def build_metadata(self, node):
        """Return the formatted datacite metadata XML as a string.
         """

        data = {
            'identifier': {
                'identifier': self.build_doi(node),
                'identifierType': 'DOI',
            },
            'creators': [
                {'creatorName': user.fullname,
                 'givenName': user.given_name,
                 'familyName': user.family_name} for user in node.visible_contributors
            ],
            'titles': [
                {'title': node.title}
            ],
            'publisher': 'Open Science Framework',
            'publicationYear': str(datetime.datetime.now().year),
            'resourceType': {
                'resourceType': 'Project',
                'resourceTypeGeneral': 'Text'
            }
        }

        if node.description:
            data['descriptions'] = [{
                'descriptionType': 'Abstract',
                'description': node.description
            }]

        if node.node_license:
            data['rightsList'] = [{
                'rights': node.node_license.name,
                'rightsURI': node.node_license.url
            }]

        # Validate dictionary
        assert schema40.validate(data)

        # Generate DataCite XML from dictionary.
        return schema40.tostring(data)
Exemplo n.º 11
0
def test_example_json_validates(example_json40):
    """Test the example file validates against the JSON schema."""
    assert validate(example_json40)
Exemplo n.º 12
0
import json
from datacite import schema40
from ames import codemeta_to_datacite

infile = open('codemeta.json', 'r')
meta = json.load(infile)
standardized = codemeta_to_datacite(meta)
standardized['identifier'] = {'identifier': '10.1/1', 'identifierType': 'DOI'}
standardized['titles'] = [{'title': "Title"}]
standardized['publisher'] = 'publisher'
standardized['publicationYear'] = '2018'
standardized['resourceType'] = {'resourceTypeGeneral': "Software"}
result = schema40.validate(standardized)
#Debugging if this fails
if result == False:
    v = schema40.validator.validate(standardized)
    errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
    for error in errors:
        print(error.message)
    exit()
else:
    print("Valid DataCite Metadata")
Exemplo n.º 13
0
    line_n = line_n + 1
    line = files['related_identifier'][line_n]

metadata['relatedIdentifiers'] = related

metadata['resourceType'] = {'resourceTypeGeneral':'Dataset'}

metadata['subjects'] = [{'subject':"atmospheric trace gases"},{"subject":"CO2"},
    {"subject": "CH4"},{"subject": "CO"},{"subject": "N2O"},
    {"subject":"column-averaged dry-air mole fractions"},
    {"subject": "remote sensing"},{"subject": "FTIR spectroscopy"},
    {"subject": "TCCON"}]

metadata['titles'] = [{'title':"TCCON data from "+site_info[0]+\
        ", Release GGG2014.R0"}]

metadata['version'] = 'GGG2014.R0'

result =  schema40.validate(metadata)
#Debugging if this fails
if result == False:
    v = schema40.validator.validate(metadata)
    errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
    for error in errors:
        print(error.message)
    exit()

json = json.dumps(metadata)
outfile = open('../'+site_info[1]+'.json','w')
outfile.write(json)
Exemplo n.º 14
0
password = infile.readline().strip()

# Initialize the MDS client.
d = DataCiteMDSClient(
    username='******',
    password=password,
    prefix=prefix,
    #test_mode=True
)

doi_end = subprocess.check_output(['./gen-cool-doi'], universal_newlines=True)
identifier = str(prefix) + '/' + str(doi_end)

metadata['identifier'] = {'identifier': identifier, 'identifierType': 'DOI'}

assert schema40.validate(metadata)
#Debugging if this fails
#v = schema40.validator.validate(metadata)
#errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
#for error in errors:
#    print(error.message)

xml = schema40.tostring(metadata)
d.metadata_post(xml)
d.doi_post(identifier, url)
print('DOI minted:' + identifier)

metadata = {}

metadata['relatedIdentifiers'] = [{
    "relatedIdentifier": url,
Exemplo n.º 15
0
    },
    'creators': [{
        'creatorName': 'Smith, John'
    }],
    'titles': [{
        'title': 'DataCite PyPI Package'
    }],
    'publisher': 'CERN',
    'publicationYear': '2015',
    'resourceType': {
        'resourceTypeGeneral': 'Dataset'
    }
}

# Validate dictionary
assert schema40.validate(data)

# Generate DataCite XML from dictionary.
doc = schema40.tostring(data)

# Initialize the MDS client.
d = DataCiteMDSClient(username='******',
                      password='******',
                      prefix='10.5072',
                      test_mode=True)

# Set metadata for DOI
d.metadata_post(doc)

# Mint new DOI
d.doi_post('10.5072/test-doi', 'http://example.org/test-doi')
        download_records(args.ids)

    files = glob.glob("*.xml")
    for f in files:
        if "datacite" not in f:

            print(f)

            with open(f, encoding="utf8") as fd:
                eprint = xmltodict.parse(fd.read())["eprints"]["eprint"]
            print(eprint["title"])

            metadata = epxml_to_datacite(eprint)

            # Validation fails on Windows
            valid = schema40.validate(metadata)
            # Debugging if this fails
            if valid == False:
                v = schema40.validator.validate(metadata)
                errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
                for error in errors:
                    print(error.message)

            xml = schema40.tostring(metadata)

            outname = f.split(".xml")[0] + "_datacite.xml"
            outfile = open(outname, "w", encoding="utf8")
            outfile.write(xml)

            outname = f.split(".xml")[0] + "_datacite.json"
            outfile = open(outname, "w", encoding="utf8")
Exemplo n.º 17
0
    def build_metadata(self, node):
        """Return the formatted datacite metadata XML as a string.
         """

        data = {
            'identifier': {
                'identifier': self.build_doi(node),
                'identifierType': 'DOI',
            },
            'creators':
            datacite_format_creators([node.creator]),
            'contributors':
            datacite_format_contributors(node.visible_contributors),
            'titles': [{
                'title': node.title
            }],
            'publisher':
            'Open Science Framework',
            'publicationYear':
            str(datetime.datetime.now().year),
            'resourceType': {
                'resourceType':
                'Pre-registration'
                if node.type == 'osf.registration' else 'Project',
                'resourceTypeGeneral':
                'Text'
            },
            'dates': [
                {
                    'date': node.created.isoformat(),
                    'dateType': 'Created'
                },
                {
                    'date': node.modified.isoformat(),
                    'dateType': 'Updated'
                },
            ]
        }

        article_doi = node.article_doi
        if article_doi:
            data['relatedIdentifiers'] = [{
                'relatedIdentifier': article_doi,
                'relatedIdentifierType': 'DOI',
                'relationType': 'IsSupplementTo'
            }]

        if node.description:
            data['descriptions'] = [{
                'descriptionType': 'Abstract',
                'description': node.description
            }]

        if node.node_license:
            data['rightsList'] = [{
                'rights': node.node_license.name,
                'rightsURI': node.node_license.url
            }]

        data['subjects'] = datacite_format_subjects(node)

        # Validate dictionary
        assert schema40.validate(data)

        # Generate DataCite XML from dictionary.
        return schema40.tostring(data)