def POST(self): web.header('Content-Type', 'application/json') if not can_write(): raise web.HTTPError('403 Forbidden') i = web.input() require_marc = not (i.get('require_marc') == 'false') bulk_marc = i.get('bulk_marc') == 'true' if 'identifier' not in i: return self.error('bad-input', 'identifier not provided') identifier = i.identifier # First check whether this is a non-book, bulk-marc item if bulk_marc: # Get binary MARC by identifier = ocaid/filename:offset:length re_bulk_identifier = re.compile("([^/]*)/([^:]*):(\d*):(\d*)") try: ocaid, filename, offset, length = re_bulk_identifier.match(identifier).groups() data, next_offset, next_length = get_from_archive_bulk(identifier) next_data = {'next_record_offset': next_offset, 'next_record_length': next_length} rec = MarcBinary(data) edition = read_edition(rec) except MarcException as e: details = "%s: %s" % (identifier, str(e)) logger.error("failed to read from bulk MARC record %s", details) return self.error('invalid-marc-record', details, **next_data) actual_length = int(rec.leader()[:5]) edition['source_records'] = 'marc:%s/%s:%s:%d' % (ocaid, filename, offset, actual_length) #TODO: Look up URN prefixes to support more sources prefix = 'trent' edition['local_id'] = ['urn:%s:%s' % (prefix, _id) for _id in rec.get_fields('001')] result = add_book.load(edition) # Add next_data to the response as location of next record: result.update(next_data) return json.dumps(result) # Case 0 - Is the item already loaded key = self.find_edition(identifier) if key: return self.status_matched(key) # Case 1 - Is this a valid Archive.org item? try: item_json = ia.get_item_json(identifier) item_server = item_json['server'] item_path = item_json['dir'] except KeyError: return self.error("invalid-ia-identifier", "%s not found" % identifier) metadata = ia.extract_item_metadata(item_json) if not metadata: return self.error("invalid-ia-identifier") # Case 2 - Does the item have an openlibrary field specified? # The scan operators search OL before loading the book and add the # OL key if a match is found. We can trust them and attach the item # to that edition. if metadata.get("mediatype") == "texts" and metadata.get("openlibrary"): d = { "title": metadata['title'], "openlibrary": "/books/" + metadata["openlibrary"] } d = self.populate_edition_data(d, identifier) return self.load_book(d) # Case 3 - Can the item be loaded into Open Library? status = ia.get_item_status(identifier, metadata, item_server=item_server, item_path=item_path) if status != 'ok': return self.error(status, "Prohibited Item") # Gio - April 2016 # items with metadata no_ol_import=true will be not imported if metadata.get("no_ol_import", '').lower() == 'true': return self.error("no-ol-import") # Case 4 - Does this item have a marc record? marc_record = self.get_marc_record(identifier) if marc_record: # Is the item a serial instead of a book? marc_leaders = marc_record.leader() if marc_leaders[7] == 's': return self.error("item-is-serial") # insider note: follows Archive.org's approach of # Item::isMARCXMLforMonograph() which excludes non-books if not (marc_leaders[7] == 'm' and marc_leaders[6] == 'a'): return self.error("item-not-book") try: edition_data = read_edition(marc_record) except MarcException as e: logger.error("failed to read from MARC record %s: %s", identifier, str(e)) return self.error("invalid-marc-record") elif require_marc: return self.error("no-marc-record") else: try: edition_data = self.get_ia_record(metadata) except KeyError: return self.error("invalid-ia-metadata") # Add IA specific fields: ocaid, source_records, and cover edition_data = self.populate_edition_data(edition_data, identifier) return self.load_book(edition_data)
def POST(self): web.header('Content-Type', 'application/json') if not can_write(): raise web.HTTPError('403 Forbidden') i = web.input() require_marc = not (i.get('require_marc') == 'false') bulk_marc = i.get('bulk_marc') == 'true' if 'identifier' not in i: return self.error('bad-input', 'identifier not provided') identifier = i.identifier # First check whether this is a non-book, bulk-marc item if bulk_marc: # Get binary MARC by identifier = ocaid/filename:offset:length re_bulk_identifier = re.compile("([^/]*)/([^:]*):(\d*):(\d*)") try: ocaid, filename, offset, length = re_bulk_identifier.match(identifier).groups() data, next_offset, next_length = get_from_archive_bulk(identifier) next_data = {'next_record_offset': next_offset, 'next_record_length': next_length} rec = MarcBinary(data) edition = read_edition(rec) except MarcException as e: details = "%s: %s" % (identifier, str(e)) logger.error("failed to read from bulk MARC record %s", details) return self.error('invalid-marc-record', details, **next_data) actual_length = int(rec.leader()[:MARC_LENGTH_POS]) edition['source_records'] = 'marc:%s/%s:%s:%d' % (ocaid, filename, offset, actual_length) local_id = i.get('local_id') if local_id: local_id_type = web.ctx.site.get('/local_ids/' + local_id) prefix = local_id_type.urn_prefix edition['local_id'] = ['urn:%s:%s' % (prefix, _id) for _id in rec.get_fields('001')] result = add_book.load(edition) # Add next_data to the response as location of next record: result.update(next_data) return json.dumps(result) # Case 1 - Is this a valid Archive.org item? try: item_json = ia.get_item_json(identifier) item_server = item_json['server'] item_path = item_json['dir'] except KeyError: return self.error("invalid-ia-identifier", "%s not found" % identifier) metadata = ia.extract_item_metadata(item_json) if not metadata: return self.error("invalid-ia-identifier") # Case 2 - Does the item have an openlibrary field specified? # The scan operators search OL before loading the book and add the # OL key if a match is found. We can trust them and attach the item # to that edition. if metadata.get("mediatype") == "texts" and metadata.get("openlibrary"): edition_data = self.get_ia_record(metadata) edition_data["openlibrary"] = metadata["openlibrary"] edition_data = self.populate_edition_data(edition_data, identifier) return self.load_book(edition_data) # Case 3 - Can the item be loaded into Open Library? status = ia.get_item_status(identifier, metadata, item_server=item_server, item_path=item_path) if status != 'ok': return self.error(status, "Prohibited Item") # Case 4 - Does this item have a marc record? marc_record = self.get_marc_record(identifier) if marc_record: self.reject_non_book_marc(marc_record) try: edition_data = read_edition(marc_record) except MarcException as e: logger.error("failed to read from MARC record %s: %s", identifier, str(e)) return self.error("invalid-marc-record") elif require_marc: return self.error("no-marc-record") else: try: edition_data = self.get_ia_record(metadata) except KeyError: return self.error("invalid-ia-metadata") # Add IA specific fields: ocaid, source_records, and cover edition_data = self.populate_edition_data(edition_data, identifier) return self.load_book(edition_data)
def POST(self): web.header('Content-Type', 'application/json') if not can_write(): raise web.HTTPError('403 Forbidden') i = web.input() require_marc = not (i.get('require_marc') == 'false') force_import = i.get('force_import') == 'true' bulk_marc = i.get('bulk_marc') == 'true' if 'identifier' not in i: return self.error('bad-input', 'identifier not provided') identifier = i.identifier # First check whether this is a non-book, bulk-marc item if bulk_marc: # Get binary MARC by identifier = ocaid/filename:offset:length re_bulk_identifier = re.compile(r"([^/]*)/([^:]*):(\d*):(\d*)") try: ocaid, filename, offset, length = re_bulk_identifier.match( identifier).groups() data, next_offset, next_length = get_from_archive_bulk( identifier) next_data = { 'next_record_offset': next_offset, 'next_record_length': next_length, } rec = MarcBinary(data) edition = read_edition(rec) except MarcException as e: details = f"{identifier}: {str(e)}" logger.error("failed to read from bulk MARC record %s", details) return self.error('invalid-marc-record', details, **next_data) actual_length = int(rec.leader()[:MARC_LENGTH_POS]) edition['source_records'] = 'marc:%s/%s:%s:%d' % ( ocaid, filename, offset, actual_length, ) local_id = i.get('local_id') if local_id: local_id_type = web.ctx.site.get('/local_ids/' + local_id) prefix = local_id_type.urn_prefix force_import = True id_field, id_subfield = local_id_type.id_location.split('$') def get_subfield(field, id_subfield): if isinstance(field, str): return field subfields = field[1].get_subfield_values(id_subfield) return subfields[0] if subfields else None _ids = [ get_subfield(f, id_subfield) for f in rec.read_fields([id_field]) if f and get_subfield(f, id_subfield) ] edition['local_id'] = [f'urn:{prefix}:{_id}' for _id in _ids] # Don't add the book if the MARC record is a non-monograph item, # unless it is a scanning partner record and/or force_import is set. if not force_import: try: raise_non_book_marc(rec, **next_data) except BookImportError as e: return self.error(e.error_code, e.error, **e.kwargs) result = add_book.load(edition) # Add next_data to the response as location of next record: result.update(next_data) return json.dumps(result) try: return self.ia_import(identifier, require_marc=require_marc, force_import=force_import) except BookImportError as e: return self.error(e.error_code, e.error, **e.kwargs)
def POST(self): web.header('Content-Type', 'application/json') if not can_write(): raise web.HTTPError('403 Forbidden') i = web.input() require_marc = not (i.get('require_marc') == 'false') bulk_marc = i.get('bulk_marc') == 'true' if 'identifier' not in i: return self.error('bad-input', 'identifier not provided') identifier = i.identifier # First check whether this is a non-book, bulk-marc item if bulk_marc: # Get binary MARC by identifier = ocaid/filename:offset:length re_bulk_identifier = re.compile("([^/]*)/([^:]*):(\d*):(\d*)") try: ocaid, filename, offset, length = re_bulk_identifier.match(identifier).groups() data, next_offset, next_length = get_from_archive_bulk(identifier) next_data = {'next_record_offset': next_offset, 'next_record_length': next_length} rec = MarcBinary(data) edition = read_edition(rec) except MarcException as e: details = "%s: %s" % (identifier, str(e)) logger.error("failed to read from bulk MARC record %s", details) return self.error('invalid-marc-record', details, **next_data) actual_length = int(rec.leader()[:MARC_LENGTH_POS]) edition['source_records'] = 'marc:%s/%s:%s:%d' % (ocaid, filename, offset, actual_length) #TODO: Look up URN prefixes to support more sources, extend openlibrary/catalog/marc/sources? if ocaid == 'OpenLibraries-Trent-MARCs': prefix = 'trent' edition['local_id'] = ['urn:%s:%s' % (prefix, _id) for _id in rec.get_fields('001')] result = add_book.load(edition) # Add next_data to the response as location of next record: result.update(next_data) return json.dumps(result) # Case 1 - Is this a valid Archive.org item? try: item_json = ia.get_item_json(identifier) item_server = item_json['server'] item_path = item_json['dir'] except KeyError: return self.error("invalid-ia-identifier", "%s not found" % identifier) metadata = ia.extract_item_metadata(item_json) if not metadata: return self.error("invalid-ia-identifier") # Case 2 - Does the item have an openlibrary field specified? # The scan operators search OL before loading the book and add the # OL key if a match is found. We can trust them and attach the item # to that edition. if metadata.get("mediatype") == "texts" and metadata.get("openlibrary"): edition_data = self.get_ia_record(metadata) edition_data["openlibrary"] = metadata["openlibrary"] edition_data = self.populate_edition_data(edition_data, identifier) return self.load_book(edition_data) # Case 3 - Can the item be loaded into Open Library? status = ia.get_item_status(identifier, metadata, item_server=item_server, item_path=item_path) if status != 'ok': return self.error(status, "Prohibited Item") # Case 4 - Does this item have a marc record? marc_record = self.get_marc_record(identifier) if marc_record: self.reject_non_book_marc(marc_record) try: edition_data = read_edition(marc_record) except MarcException as e: logger.error("failed to read from MARC record %s: %s", identifier, str(e)) return self.error("invalid-marc-record") elif require_marc: return self.error("no-marc-record") else: try: edition_data = self.get_ia_record(metadata) except KeyError: return self.error("invalid-ia-metadata") # Add IA specific fields: ocaid, source_records, and cover edition_data = self.populate_edition_data(edition_data, identifier) return self.load_book(edition_data)