def listrecords(endpoint, oaiset=None, resumption_token=None, metadataPrefix="oai_dc", limit=1000): """ e.g.: curl "http://localhost:8880/oai.listrecords.json?oaiset=hdl_1721.1_18193&limit=10" """ limit = int(limit) remote = oaiservice(endpoint, logger) list_records_result = remote.list_records(set=oaiset, resumption_token=resumption_token, metadataPrefix=metadataPrefix) records = list_records_result['records'][:limit] resumption_token = list_records_result['resumption_token'] if 'resumption_token' in list_records_result else '' exhibit_records = [] properties_used = set() # track the properties in use for rid, rinfo in records: erecord = {u'id': rid} for k, v in rinfo.iteritems(): if len(v) == 1 and isinstance(v, list): erecord[k] = v[0] else: erecord[k] = v if u'title' in erecord: erecord[u'label'] = erecord[u'title'] properties_used.update(erecord.keys()) exhibit_records.append(erecord) PROFILE["properties"][:] = strip_unused_profile_properties(PROFILE["properties"],properties_used) #FIXME: This profile is NOT correct. Dumb copy from CDM endpoint. Please fix up below return json.dumps({'items': exhibit_records, 'data_profile': PROFILE, 'resumption_token': resumption_token}, indent=4)
def test_oai_dc_field_conversion(): """ oai.oaiservice returns dict with correct fields for DC-format XML """ svc = oaiservice("http://digitallibrary.usc.edu/oai/oai.php", logger) lr_result = svc.list_records(set_id="p15799coll46", metadataPrefix="oai_dc") record = first_non_collection_record(lr_result["records"]) actual_fields = record[1].keys() # (id, record) actual_fields.sort() expected_fields = [ "contributor", "coverage", "creator", "date", "datestamp", "handle", "publisher", "relation", "rights", "setSpec", "source", "status", "title", "type", ] assert actual_fields == expected_fields
def test_oai_qdc_field_conversion(): """ oai.oaiservice returns dict with correct fields for QDC-format XML """ svc = oaiservice("http://repository.clemson.edu/cgi-bin/oai.exe", logger) lr_result = svc.list_records(set_id="mbe", metadataPrefix="qdc") record = first_non_collection_record(lr_result["records"]) actual_fields = record[1].keys() # (id, record) actual_fields.sort() expected_fields = [ "contributor", "date", "datestamp", "description", "format", "handle", "language", "medium", "publisher", "relation", "rights", "setSpec", "source", "spatial", "status", "subject", "title", "type", ] assert actual_fields == expected_fields, "\n%s\ndoes not match expected:\n%s\n" % (actual_fields, expected_fields)
def getrecord(endpoint, id): """ e.g.: curl "http://localhost:8880/dpla-get-record?endpoint=URL&id=IDENTIFIER" """ remote = oaiservice(endpoint, logger) get_record_result = remote.get_record(id=id) record = get_record_result['record'] exhibit_record = [] properties_used = set() # track the properties in use for rid, rinfo in record: erecord = {u'id': rid} for k, v in rinfo.iteritems(): if len(v) == 1: erecord[k] = v[0] else: erecord[k] = v if u'title' in erecord: erecord[u'label'] = erecord[u'title'] properties_used.update(erecord.keys()) exhibit_record.append(erecord) PROFILE["properties"][:] = strip_unused_profile_properties(PROFILE["properties"],properties_used) #FIXME: This profile is NOT correct. Dumb copy from CDM endpoint. Please fix up below return json.dumps({'items': exhibit_record, 'data_profile': PROFILE}, indent=4)
def getrecord(endpoint, id): """ e.g.: curl "http://localhost:8880/dpla-get-record?endpoint=URL&id=IDENTIFIER" """ remote = oaiservice(endpoint, logger) get_record_result = remote.get_record(id=id) record = get_record_result['record'] exhibit_record = [] properties_used = set() # track the properties in use for rid, rinfo in record: erecord = {u'id': rid} for k, v in rinfo.iteritems(): if len(v) == 1: erecord[k] = v[0] else: erecord[k] = v if u'title' in erecord: erecord[u'label'] = erecord[u'title'] properties_used.update(erecord.keys()) exhibit_record.append(erecord) PROFILE["properties"][:] = strip_unused_profile_properties( PROFILE["properties"], properties_used) #FIXME: This profile is NOT correct. Dumb copy from CDM endpoint. Please fix up below return json.dumps({ 'items': exhibit_record, 'data_profile': PROFILE }, indent=4)
def test_untl_field_conversion(): """ oai.oaiservice returns dict with correct fields for UNTL-format XML """ svc = oaiservice("http://texashistory.unt.edu/oai/", logger) lr_result = svc.list_records(set_id="partner:RGPL", metadataPrefix="untl") record = first_non_collection_record(lr_result["records"]) actual_record_fields = record[1].keys() # (id, record) actual_record_fields.sort() metadata = record[1]["metadata"]["untl:metadata"] actual_untl_fields = metadata.keys() actual_untl_fields.sort() expected_record_fields = ["header", "metadata"] expected_untl_fields = [ "untl:collection", "untl:coverage", "untl:creator", "untl:date", "untl:description", "untl:format", "untl:identifier", "untl:institution", "untl:language", "untl:meta", "untl:note", "untl:primarySource", "untl:publisher", "untl:resourceType", "untl:rights", "untl:subject", "untl:title", "xmlns:untl", ] assert actual_record_fields == expected_record_fields assert actual_untl_fields == expected_untl_fields
def listsets(endpoint, limit=None): """ e.g.: curl "http://localhost:8880/oai.listsets.json?limit=10" """ remote = oaiservice(endpoint, logger) sets = remote.list_sets()[:int(limit)] if limit else remote.list_sets() return json.dumps(sets, indent=4)
def test_oai_dc_field_conversion(): """ oai.oaiservice returns dict with correct fields for DC-format XML """ svc = oaiservice("http://digitallibrary.usc.edu/oai/oai.php", logger) lr_result = svc.list_records(set_id="p15799coll46", metadataPrefix="oai_dc") record = first_non_collection_record(lr_result["records"]) actual_fields = record[1].keys() # (id, record) actual_fields.sort() expected_fields = ['contributor', 'coverage', 'creator', 'date', 'datestamp', 'handle', 'publisher', 'relation', 'rights', 'setSpec', 'source', 'status', 'title', 'type'] assert actual_fields == expected_fields
def listrecords(endpoint, oaiset=None, resumption_token=None, metadataPrefix="oai_dc", limit=1000): """ e.g.: curl "http://localhost:8880/oai.listrecords.json?oaiset=hdl_1721.1_18193&limit=10" """ limit = int(limit) remote = oaiservice(endpoint, logger) list_records_result = remote.list_records( set=oaiset, resumption_token=resumption_token, metadataPrefix=metadataPrefix) records = list_records_result['records'][:limit] resumption_token = list_records_result[ 'resumption_token'] if 'resumption_token' in list_records_result else '' exhibit_records = [] properties_used = set() # track the properties in use for rid, rinfo in records: erecord = {u'id': rid} for k, v in rinfo.iteritems(): if len(v) == 1 and isinstance(v, list): erecord[k] = v[0] else: erecord[k] = v if u'title' in erecord: erecord[u'label'] = erecord[u'title'] properties_used.update(erecord.keys()) exhibit_records.append(erecord) PROFILE["properties"][:] = strip_unused_profile_properties( PROFILE["properties"], properties_used) #FIXME: This profile is NOT correct. Dumb copy from CDM endpoint. Please fix up below return json.dumps( { 'items': exhibit_records, 'data_profile': PROFILE, 'resumption_token': resumption_token }, indent=4)
def test_oai_qdc_field_conversion(): """ oai.oaiservice returns dict with correct fields for QDC-format XML """ svc = oaiservice("http://repository.clemson.edu/cgi-bin/oai.exe", logger) lr_result = svc.list_records(set_id="mbe", metadataPrefix="qdc") record = first_non_collection_record(lr_result["records"]) actual_fields = record[1].keys() # (id, record) actual_fields.sort() expected_fields = ['contributor', 'date', 'datestamp', 'description', 'format', 'handle', 'language', 'medium', 'publisher', 'relation', 'rights', 'setSpec', 'source', 'spatial', 'status', 'subject', 'title', 'type'] assert actual_fields == expected_fields, \ "\n%s\ndoes not match expected:\n%s\n" % (actual_fields, expected_fields)
def test_mods_field_conversion(): """ oai.oaiservice returns dict with correct fields for MODS-format XML It's difficult at the moment to test all of the metadata fields because of variations between providers, and we don't have a configuration that specifies valid fields per provider. """ svc = oaiservice("http://vcoai.lib.harvard.edu/vcoai/vc", logger) lr_result = svc.list_records(set_id="manuscripts", metadataPrefix="mods") record = first_non_collection_record(lr_result["records"]) actual_record_fields = record[1].keys() # (id, record) actual_record_fields.sort() actual_mods_fields = record[1]['metadata']['mods'].keys() actual_mods_fields.sort() expected_record_fields = ['header', 'metadata'] # minimum fields for f in expected_record_fields: assert f in actual_record_fields
def test_mods_field_conversion(): """ oai.oaiservice returns dict with correct fields for MODS-format XML It's difficult at the moment to test all of the metadata fields because of variations between providers, and we don't have a configuration that specifies valid fields per provider. """ svc = oaiservice("http://vcoai.lib.harvard.edu/vcoai/vc", logger) lr_result = svc.list_records(set_id="manuscripts", metadataPrefix="mods") record = first_non_collection_record(lr_result["records"]) actual_record_fields = record[1].keys() # (id, record) actual_record_fields.sort() actual_mods_fields = record[1]["metadata"]["mods"].keys() actual_mods_fields.sort() expected_record_fields = ["header", "metadata"] # minimum fields for f in expected_record_fields: assert f in actual_record_fields
def test_marc_field_conversion(): """ oai.oaiservice returns dict with correct fields for MARC-format XML """ svc = oaiservice( # uiuc_book profile "http://ratri.grainger.illinois.edu/oca-oaiprovider/oai.asp", logger) lr_result = svc.list_records(set_id="UC", metadataPrefix="marc") record = first_non_collection_record(lr_result["records"]) actual_record_fields = record[1].keys() # (id, record) actual_record_fields.sort() metadata = record[1]['metadata']['record'] actual_marc_fields = metadata.keys() actual_marc_fields.sort() expected_record_fields = ['header', 'metadata'] expected_marc_fields = ['controlfield', 'datafield', 'leader', 'xmlns', 'xmlns:xsi', 'xsi:schemaLocation'] assert actual_record_fields == expected_record_fields assert actual_marc_fields == expected_marc_fields
def test_marc_field_conversion(): """ oai.oaiservice returns dict with correct fields for MARC-format XML """ svc = oaiservice( # uiuc_book profile "http://ratri.grainger.illinois.edu/oca-oaiprovider/oai.asp", logger, ) lr_result = svc.list_records(set_id="UC", metadataPrefix="marc") record = first_non_collection_record(lr_result["records"]) actual_record_fields = record[1].keys() # (id, record) actual_record_fields.sort() metadata = record[1]["metadata"]["record"] actual_marc_fields = metadata.keys() actual_marc_fields.sort() expected_record_fields = ["header", "metadata"] expected_marc_fields = ["controlfield", "datafield", "leader", "xmlns", "xmlns:xsi", "xsi:schemaLocation"] assert actual_record_fields == expected_record_fields assert actual_marc_fields == expected_marc_fields
def test_untl_field_conversion(): """ oai.oaiservice returns dict with correct fields for UNTL-format XML """ svc = oaiservice("http://texashistory.unt.edu/oai/", logger) lr_result = svc.list_records(set_id="partner:RGPL", metadataPrefix="untl") record = first_non_collection_record(lr_result["records"]) actual_record_fields = record[1].keys() # (id, record) actual_record_fields.sort() metadata = record[1]['metadata']['untl:metadata'] actual_untl_fields = metadata.keys() actual_untl_fields.sort() expected_record_fields = ['header', 'metadata'] expected_untl_fields = ['untl:collection', 'untl:coverage', 'untl:creator', 'untl:date', 'untl:description', 'untl:format', 'untl:identifier', 'untl:institution', 'untl:language', 'untl:meta', 'untl:note', 'untl:primarySource', 'untl:publisher', 'untl:resourceType', 'untl:rights', 'untl:subject', 'untl:title', 'xmlns:untl'] assert actual_record_fields == expected_record_fields assert actual_untl_fields == expected_untl_fields
def listrecords(endpoint, oaiset=None, resumption_token=None, metadataPrefix="oai_dc", frm=None, until=None, limit=1000): """ e.g.: curl "http://localhost:8880/oai.listrecords.json?oaiset=hdl_1721.1_18193&limit=10" """ limit = int(limit) if frm is not None and until is None: until = datetime.now().strftime("%Y-%m-%d") remote = oaiservice(endpoint, logger) try: list_records_result = \ remote.list_records(set_id=oaiset, resumption_token=resumption_token, metadataPrefix=metadataPrefix, frm=frm, until=until) records = list_records_result['records'][:limit] resumption_token = list_records_result.get('resumption_token', '') exhibit_records = [] properties_used = set() # track the properties in use for rid, rinfo in records: erecord = {u'id': rid} for k, v in rinfo.iteritems(): if isinstance(v, list) and len(v) == 1: erecord[k] = v[0] else: erecord[k] = v if u'title' in erecord: erecord[u'label'] = erecord[u'title'] properties_used.update(erecord.keys()) exhibit_records.append(erecord) PROFILE["properties"][:] = \ strip_unused_profile_properties(PROFILE["properties"], properties_used) return json.dumps({ 'items': exhibit_records, 'data_profile': PROFILE, 'resumption_token': resumption_token }, indent=4) except OAIError as e: msg = "Error message from OAI response for set %s: %s" % (oaiset, e.message) logger.error(msg) response.code = 500 return msg except OAIHTTPError as e: msg = "HTTP error fetching set %s: %s" % (oaiset, e.message) logger.error(msg) response.code = 500 return msg except OAIParseError as e: msg = "Document structure error with set %s: %s" % (oaiset, e.message) logger.error(msg) response.code = 500 return msg
def listrecords(endpoint, oaiset=None, resumption_token=None, metadataPrefix="oai_dc", frm=None, until=None, limit=1000): """ e.g.: curl "http://localhost:8880/oai.listrecords.json?oaiset=hdl_1721.1_18193&limit=10" """ limit = int(limit) if frm is not None and until is None: until = datetime.now().strftime("%Y-%m-%d") remote = oaiservice(endpoint, logger) try: list_records_result = \ remote.list_records(set_id=oaiset, resumption_token=resumption_token, metadataPrefix=metadataPrefix, frm=frm, until=until) records = list_records_result['records'][:limit] resumption_token = list_records_result.get('resumption_token', '') exhibit_records = [] properties_used = set() # track the properties in use for rid, rinfo in records: erecord = {u'id': rid} for k, v in rinfo.iteritems(): if isinstance(v, list) and len(v) == 1: erecord[k] = v[0] else: erecord[k] = v if u'title' in erecord: erecord[u'label'] = erecord[u'title'] properties_used.update(erecord.keys()) exhibit_records.append(erecord) PROFILE["properties"][:] = \ strip_unused_profile_properties(PROFILE["properties"], properties_used) return json.dumps( { 'items': exhibit_records, 'data_profile': PROFILE, 'resumption_token': resumption_token }, indent=4) except OAIError as e: msg = "Error message from OAI response for set %s: %s" % (oaiset, e.message) logger.error(msg) response.code = 500 return msg except OAIHTTPError as e: msg = "HTTP error fetching set %s: %s" % (oaiset, e.message) logger.error(msg) response.code = 500 return msg except OAIParseError as e: msg = "Document structure error with set %s: %s" % (oaiset, e.message) logger.error(msg) response.code = 500 return msg