def add_book(data): # add book to openlibrary.org # Define a Book Object authors = [ common.Author(name=author) for author in data['authors'].split(', ') ] book = common.Book( title=data['title'], authors=authors, publisher=data['publisher'], publish_date=data['year'], pages=data['pages'], ) # Add metadata like ISBN 10 and ISBN 13 isbn = data['isbn'] if is_isbn10(isbn): book.add_id('isbn_10', isbn) elif is_isbn13(isbn): book.add_id('isbn_13', isbn) # Create a new book ol = OpenLibrary() new_book = ol.create_book(book) new_book.add_bookcover(data['cover']) new_book.work.add_subject(data['categories']) new_book.save() return new_book
def __init__(self, ol=None, dry_run=True, limit=1): """Create logger and class variables""" if ol is None: self.ol = OpenLibrary() else: self.ol = ol self.changed = 0 self.dry_run = dry_run self.limit = limit job_name = sys.argv[0].replace('.py', '') self.logger = logging.getLogger("jobs.%s" % job_name) self.logger.setLevel(logging.DEBUG) log_formatter = logging.Formatter( '%(name)s;%(levelname)-8s;%(asctime)s %(message)s') self.console_handler = logging.StreamHandler() self.console_handler.setLevel(logging.WARN) self.console_handler.setFormatter(log_formatter) self.logger.addHandler(self.console_handler) log_dir = 'logs/jobs/%s' % job_name makedirs(log_dir, exist_ok=True) log_file_datetime = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") log_file = log_dir + '/%s_%s.log' % (job_name, log_file_datetime) file_handler = logging.FileHandler(log_file) file_handler.setLevel(logging.DEBUG) file_handler.setFormatter(log_formatter) self.logger.addHandler(file_handler)
def __init__(self, ol=None, dry_run=True, limit=1, job_name=__name__) -> None: """Create logger and class variables""" self.ol = ol or OpenLibrary() self.parser = argparse.ArgumentParser(description=__doc__) self.parser.add_argument('-f', '--file', type=str, default=None, help='Path to file containing input data') self.parser.add_argument( '-l, --limit', type=int, default=1, help='Limit number of edits performed on external data.' 'Set to zero to allow unlimited edits.') self.parser.add_argument( '-d', '--dry-run', type=self._str2bool, default=True, help='Execute the script without performing edits on external data.' ) self.args = self.parser.parse_args() self.dry_run = getattr(self.args, 'dry-run', None) or dry_run self.limit = getattr(self.args, 'limit', None) or limit self.changed = 0 self.logger, self.console_handler = self.setup_logger(job_name)
def setUp(self, mock_login): self.ol = OpenLibrary() self.strings = {'description': 'A String Description', 'notes': 'A String Note'} self.texts = { 'description': {'type': '/type/text', 'value': 'A Text Description'}, 'notes': {'type': '/type/text', 'value': 'A Text Note'}, }
def __init__(self, ol=None, write_changes=False, limit=1, job_name=__name__) -> None: """Create logger and class variables""" self.ol = ol or OpenLibrary() self.parser = argparse.ArgumentParser(description=__doc__) self.parser.add_argument( '-f', '--file', type=str, default=None, help='Path to file containing input data', ) self.parser.add_argument( '-l', '--limit', type=int, default=1, help='Limit number of edits performed on external data.' 'Set to zero to allow unlimited edits.', ) self.parser.add_argument( '-w', '--write-changes', action="store_true", help='Execute the script and write all changes to external data.', ) self.args = self.parser.parse_args() self.write_changes = write_changes or self.args.write_changes self.limit = getattr(self.args, 'limit', None) or limit self.changed = 0 self.logger, self.console_handler = self.setup_logger(job_name)
def get_edition_from_work(olid): """Get Work and related Editions from Work olid The individual editions are parsed and merged into a dict""" # determine if we have an Edition(M) or Work(W) olid. if olid.endswith("M"): olids = [olid] elif olid.endswith("W"): OL = OpenLibrary() # get work and related Editions. Convert Edition class to dict work = OL.Work.get(olid) editions = [vars(e) for e in work.editions] olids = [edition["olid"] for edition in editions] else: raise KeyError(f"missing/wrong Work olid. Should end with 'M' or 'W'") # For simplicity we lookup the Edition olid using request, instead of # parsing the OL class d = {} for olid in olids: res = query(olid, bibkey="OLID") d = {**d, **res} # LOGGER.debug(f"{olid}: {res}") return d
def main(): global OL OL = OpenLibrary() parser = argparse.ArgumentParser() parser.add_argument( "--query", help="Your Google Books query (title, author, ISBN, etc)", required=True) parser.add_argument("--google_api_key", help="Your Google API key", required=True) args = parser.parse_args() google_books_service = build('books', 'v1', developerKey=args.google_api_key) google_books_request = google_books_service.volumes().list(source='public', q=args.query) google_books_response = google_books_request.execute() number_of_google_books = google_books_response["totalItems"] number_of_considered_books = min(number_of_google_books, NUMBER_OF_BOOK_CHOICES) if number_of_google_books == 0: raise ValueError( "The Google Books API returned no results for your query. Aborting." ) google_books = google_books_response["items"] ol_books = _ol_books_from_google_books(google_books, number_of_considered_books) # If query is an ISBN and Google finds a match, go ahead and upload if _isbn_matches(ol_books[0], args.query): ol_book = _ol_book_from_google_book(google_books[0]) return _upload_ol_book(ol_book) # Else, let the user choose from a list print( "Google Books found {} results for this query. Here are the first {}:". format(number_of_google_books, number_of_considered_books)) for i, ol_book in enumerate(ol_books): isbn_10 = ol_book.identifiers["isbn_10"][0] print("\t{}: '{}' by {} - ISBN {}".format(i, ol_book.title, ol_book.primary_author.name, isbn_10)) chosen_index = int(input("Which of these would you like to upload? ")) if chosen_index > number_of_considered_books: raise ValueError("Invalid choice. Aborting.") _upload_ol_book(ol_books[chosen_index])
def main(): global OL OL = OpenLibrary() parser = argparse.ArgumentParser() parser.add_argument("--title", required=True) parser.add_argument("--author", default="", required=False) args = parser.parse_args() data = {} data["title"] = args.title data["authors"] = [author for author in args.author.split("; ")] book = create_book(data) return book, search_book(book)
def main(): # Defining an Open Library Object ol = OpenLibrary() # Define a Book Object book = common.Book( title="Warlight: A novel", authors=[common.Author(name="Michael Ondaatje")], publisher="Deckle Edge", publish_date="2018", ) # Add metadata like ISBN 10 and ISBN 13 book.add_id('isbn_10', '0525521194') book.add_id('isbn_13', '978-0525521198') # Create a new book new_book = ol.create_book(book) # Add a book cover for the given book new_book.add_bookcover( 'https://images-na.ssl-images-amazon.com/images/I/51kmM%2BvVRJL._SX337_BO1,204,203,200_.jpg' )
class TestAuthors(unittest.TestCase): @patch('olclient.openlibrary.OpenLibrary.login') def setUp(self, mock_login): self.ol = OpenLibrary() def test_author_validation(self): author = self.ol.Author('OL123A', name='Test Author', revision=1, last_modified={ 'type': '/type/datetime', 'value': '2016-10-12T00:48:04.453554' }) self.assertIsNone(author.validate())
def search_book(ol_book): "Search for a book using title and author" OL = OpenLibrary() work = OL.Work.search(title=ol_book.title, author=ol_book.primary_author.name) LOGGER.debug( f"Work found from search using:" f"{ol_book.primary_author.name}: {ol_book.title}\n" f"{work}" ) try: identifiers = get_identifiers(work.identifiers) olid = identifiers["olid"][0] except: olid = "" work = {} return olid, work
def setUp(self): ol_config = Config().get_config()['openlibrary'] ol_creds = ol_config.get('credentials') self.ol = OpenLibrary(credentials=ol_creds)
def setUp(self, mock_login): self.ol = OpenLibrary()
import re from olclient.openlibrary import OpenLibrary ol = OpenLibrary() def get_type(olid): ol_types = {'OL..A': 'author', 'OL..M': 'book', 'OL..W': 'work'} kind = re.sub('\d+', '..', olid) return ol_types[kind] def full_key(olid): return "/%ss/%s" % (get_type(olid), olid) def full_url(olid): return "%s%s.json" % (ol.base_url, full_key(olid))
# iterate through archive.org items with only openlibrary # and write back openlibrary_edition and openlibrary_work import json import sys import time import requests from internetarchive import modify_metadata from olclient.openlibrary import OpenLibrary fname = sys.argv[1] ol = OpenLibrary() n = 0 with open(fname, 'r') as f: for line in f.readlines(): data = json.loads(line) olid = data['openlibrary'] ocaid = data['identifier'] try: e = ol.get(olid) wolid = e.work.olid assert wolid except requests.exceptions.HTTPError as e: print('404', olid, ocaid) wolid = None to_write = { 'openlibrary_edition': olid } if wolid:
# Change the Python Run Time Path import sys sys.path.insert(0, '../') # Import necessary libraries to use from olclient.openlibrary import OpenLibrary import olclient.common as common # Defining an Open Library Object ol = OpenLibrary() # Define a Book Object book = common.Book(title="Warlight: A novel", authors=[common.Author(name="Michael Ondaatje")], publisher="Deckle Edge", publish_date="2018") # Add metadata like ISBN 10 and ISBN 13 book.add_id('isbn_10', '0525521194') book.add_id('isbn_13', '978-0525521198') # Create a new book new_book = ol.create_book(book) # Add a book cover for the given book new_book.add_bookcover( 'https://images-na.ssl-images-amazon.com/images/I/51kmM%2BvVRJL._SX337_BO1,204,203,200_.jpg' )
class TestOpenLibrary(unittest.TestCase): @patch('olclient.openlibrary.OpenLibrary.login') def setUp(self, mock_login): self.ol = OpenLibrary() @patch('requests.Session.get') def test_get_olid_by_isbn(self, mock_get): isbn_key = 'ISBN:0374202915' isbn_bibkeys = { isbn_key: { 'info_url': 'https://openlibrary.org/books/OL23575801M/Marie_LaVeau' } } mock_get.return_value.json.return_value = isbn_bibkeys olid = self.ol.Edition.get_olid_by_isbn('0374202915') mock_get.assert_called_with( f"{self.ol.base_url}/api/books.json?bibkeys={isbn_key}" ) expected_olid = 'OL23575801M' self.assertTrue( olid == expected_olid, f"Expected olid {expected_olid}, got {olid}" ) @patch('requests.Session.get') def test_get_olid_notfound_by_bibkey(self, mock_get): mock_get.json_data = {} edition = self.ol.Edition.get(isbn='foobar') assert edition is None @patch('requests.Session.get') def test_get_work_by_metadata(self, mock_get): doc = { "key": "/works/OL2514747W", "title": "The Autobiography of Benjamin Franklin", } search_results = {'start': 0, 'num_found': 1, 'docs': [doc]} title = "The Autobiography of Benjamin Franklin" mock_get.return_value.json.return_value = search_results book = self.ol.Work.search(title=title) mock_get.assert_called_with(f"{self.ol.base_url}/search.json?title={title}") canonical_title = book.canonical_title self.assertTrue( 'franklin' in canonical_title, "Expected 'franklin' to appear in result title: %s" % canonical_title, ) @patch('requests.Session.get') def test_get_edition_by_isbn(self, mock_get): isbn_lookup_response = { 'ISBN:0374202915': { 'info_url': 'https://openlibrary.org/books/OL23575801M/Marie_LaVeau' } } edition_response = {'key': "/books/OL23575801M", 'title': 'test'} mock_get.return_value.json.side_effect = [ isbn_lookup_response, edition_response, ] book = self.ol.Edition.get(isbn='0374202915') mock_get.assert_has_calls( [ call("%s/api/books.json?bibkeys=ISBN:0374202915" % self.ol.base_url), call().raise_for_status(), call().json(), call("{}/books/OL23575801M.json".format(self.ol.base_url)), call().raise_for_status(), call().json(), ] ) expected_olid = 'OL23575801M' self.assertTrue( book.olid == expected_olid, f"Expected olid {expected_olid}, got {book.olid}", ) @patch('requests.Session.get') def test_matching_authors_olid(self, mock_get): author_autocomplete = [ {'name': "Benjamin Franklin", 'key': "/authors/OL26170A"} ] mock_get.return_value.json.return_value = author_autocomplete name = 'Benjamin Franklin' got_olid = self.ol.Author.get_olid_by_name(name) expected_olid = 'OL26170A' self.assertTrue( got_olid == expected_olid, f"Expected olid {expected_olid}, got {got_olid}" ) @patch('requests.Session.get') def test_create_book(self, mock_get): book = Book( publisher='Karamanolis', title='Alles ber Mikrofone', identifiers={'isbn_10': ['3922238246']}, publish_date=1982, authors=[Author(name='Karl Schwarzer')], publish_location='Neubiberg bei Mnchen', ) author_autocomplete = [{'name': "Karl Schwarzer", 'key': "/authors/OL7292805A"}] mock_get.return_value.json.return_value = author_autocomplete got_result = self.ol.create_book(book, debug=True) mock_get.assert_called_with( "{}/authors/_autocomplete?q={}&limit=1".format( self.ol.base_url, "Karl Schwarzer" ) ) expected_result = { '_save': '', 'author_key': '/authors/OL7292805A', 'author_name': 'Karl Schwarzer', 'id_name': 'isbn_10', 'id_value': '3922238246', 'publish_date': 1982, 'publisher': 'Karamanolis', 'title': 'Alles ber Mikrofone', } self.assertTrue( got_result == expected_result, "Expected create_book to return %s, got %s" % (expected_result, got_result), ) def test_get_work(self): work_json = {'title': 'All Quiet on the Western Front'} work = self.ol.Work('OL12938932W', **work_json) self.assertTrue( work.title.lower() == 'all quiet on the western front', "Failed to retrieve work", ) def test_work_json(self): authors = [ {"type": "/type/author_role", "author": {"key": "/authors/OL5864762A"}} ] work = self.ol.Work('OL12938932W', key='/works/OL12938932W', authors=authors) work_json = work.json() self.assertEqual(work_json['key'], "/works/OL12938932W") self.assertEqual( work_json['authors'][0]['author']['key'], "/authors/OL5864762A" ) def test_work_validation(self): work = self.ol.Work( 'OL123W', title='Test Title', type={'key': '/type/work'}, revision=1, last_modified={ 'type': '/type/datetime', 'value': '2016-10-12T00:48:04.453554', }, ) self.assertIsNone(work.validate()) def test_edition_json(self): author = self.ol.Author('OL123A', 'Test Author') edition = self.ol.Edition( edition_olid='OL123M', work_olid='OL123W', title='Test Title', authors=[author], ) edition_json = edition.json() self.assertEqual(edition_json['key'], "/books/OL123M") self.assertEqual(edition_json['works'][0], {'key': '/works/OL123W'}) self.assertEqual(edition_json['authors'][0], {'key': '/authors/OL123A'}) self.assertNotIn('work_olid', edition_json) self.assertNotIn( 'cover', edition_json, "'cover' is not a valid Edition property, should be list: 'covers'", ) def test_edition_validation(self): author = self.ol.Author('OL123A', 'Test Author') edition = self.ol.Edition( edition_olid='OL123M', work_olid='OL123W', title='Test Title', type={'key': '/type/edition'}, revision=1, last_modified={ 'type': '/type/datetime', 'value': '2016-10-12T00:48:04.453554', }, authors=[author], ) self.assertIsNone(edition.validate()) orphaned_edition = self.ol.Edition( edition_olid='OL123M', work_olid=None, title='Test Title', authors=[author] ) with self.assertRaises(jsonschema.exceptions.ValidationError): orphaned_edition.validate() @patch('requests.Session.get') def test_get_notfound(self, mock_get): # This tests that if requests.raise_for_status() raises an exception, # (e.g. 404 or 500 HTTP response) it is not swallowed by the client. mock_get.return_value.raise_for_status = raise_http_error suffixes = {'edition': 'M', 'work': 'W', 'author': 'A'} for _type, suffix in suffixes.items(): target = "OLnotfound%s" % suffix with pytest.raises(requests.HTTPError): r = self.ol.get(target) pytest.fail(f"HTTPError not raised for {_type}: {target}") @patch('requests.Session.post') def test_save_many(self, mock_post): edition = self.ol.Edition( edition_olid='OL123M', work_olid='OL12W', title='minimal edition' ) work = self.ol.Work(olid='OL12W', title='minimal work') self.ol.save_many([edition, work], "test comment") mock_post.assert_called_with( "%s/api/save_many" % self.ol.base_url, ANY, headers=ANY ) called_with_json = json.loads(mock_post.call_args[0][1]) called_with_headers = mock_post.call_args[1]['headers'] assert len(called_with_json) == 2 self.assertIn('ns=42', called_with_headers['Opt']) self.assertEqual('test comment', called_with_headers['42-comment']) def test_delete(self): delete = self.ol.Delete('OL1W') self.assertEqual(delete.olid, 'OL1W') self.assertEqual('/type/delete', delete.json()['type']['key']) self.assertEqual('/works/OL1W', delete.json()['key']) def test_redirect(self): redirect = self.ol.Redirect(f='OL1W', t='OL2W') self.assertEqual('/type/redirect', redirect.json()['type']['key']) self.assertIn('location', redirect.json())
parser.add_argument('-f', '--file', help='Bulk MARC file to import') parser.add_argument('-l', '--limit', help='Number of records to import', type=int, default=1) parser.add_argument('-o', '--offset', help='Offset in BYTES from which to start importing', type=int, default=0) args = parser.parse_args() item = args.item fname = args.file if args.info: # List MARC21 files, then quit. for f in get_marc21_files(item): print(f) exit() if LIVE: ol = OpenLibrary() #ol = OpenLibrary(base_url='https://dev.openlibrary.org') else: local_dev = 'http://*****:*****@example.com', 'admin123') ol = OpenLibrary(base_url=local_dev, credentials=c) limit = args.limit # if non-zero, a limit to only process this many records from each file count = 0 print('FILENAME: %s' % fname) offset = args.offset length = 5 # we only need to get the length of the first record (first 5 bytes), the API will seek to the end. while length: if limit and count >= limit:
#!/usr/bin/env python3 import json from olclient.openlibrary import OpenLibrary ol = OpenLibrary() infile = "olids-to-update.txt" # Takes an infile and writes ocaids to Open Library items and performs a sync. # infile is the json output of an archive.org search query # containing 'openlibrary' (edition olid) and 'identifier' (ocaid) fields def sync_ol_to_ia(olid): r = ol.session.get(ol.base_url + "/admin/sync?edition_id=" + olid) if r.status_code == 500: content = {'error': 'HTTP 500'} else: content = r.json() if 'error' in content and 'no changes to _meta.xml' not in content[ 'error']: # and r.json()['error'] == 'No qualifying edition': print("%s, %s: %s" % (olid, ocaid, content)) return r.status_code # start and end are False or line numbers in infile to begin and stop processing # Used in case there is a need to resume or re-run part of a batch. start = False end = False with open(infile) as f: for count, line in enumerate(f):
parser.print_help(sys.stderr) sys.exit(1) args = parser.parse_args() item = args.item fname = args.file local_testing = args.local dev_testing = args.testing staging_testing = args.staging barcode = args.barcode if local_testing: Credentials = namedtuple('Credentials', ['username', 'password']) local_dev = 'http://*****:*****@example.com', 'admin123') ol = OpenLibrary(base_url=local_dev, credentials=c) elif staging_testing: ol = OpenLibrary(base_url='https://staging.openlibrary.org') elif dev_testing: ol = OpenLibrary(base_url='https://testing.openlibrary.org') else: ol = OpenLibrary() print('Importing to %s' % ol.base_url) print('ITEM: %s' % item) print('FILENAME: %s' % fname) if args.info: if barcode is True: # display available local_ids print('Available local_ids to import:')
class TestOpenLibrary(unittest.TestCase): def setUp(self): ol_config = Config().get_config()['openlibrary'] ol_creds = ol_config.get('credentials') self.ol = OpenLibrary(credentials=ol_creds) def test_get_olid_by_isbn(self): olid = self.ol.Edition.get_olid_by_isbn(u'0374202915') expected_olid = u'OL23575801M' self.assertTrue(olid == expected_olid, "Expected olid %s, got %s" % (expected_olid, olid)) def test_get_work_by_metadata(self): title = u"The Autobiography of Benjamin Franklin" book = self.ol.Work.get_by_metadata(title=title) canonical_title = book.canonical_title self.assertTrue('franklin' in canonical_title, "Expected 'franklin' to appear in result title: %s" % \ canonical_title) def test_get_edition_by_isbn(self): book = self.ol.Edition.get(isbn=u'0374202915') expected_olid = u'OL23575801M' self.assertTrue( book.olid == expected_olid, "Expected olid %s, got %s" % (expected_olid, book.olid)) def test_matching_authors_olid(self): name = u'Benjamin Franklin' got_olid = self.ol.Author.get_olid_by_name(name) expected_olid = u'OL26170A' self.assertTrue(got_olid == expected_olid, "Expected olid %s, got %s" % (expected_olid, got_olid)) def test_create_book(self): book = Book(publisher=u'Karamanolis', title=u'Alles ber Mikrofone', identifiers={'isbn_10': [u'3922238246']}, publish_date=1982, authors=[Author(name=u'Karl Schwarzer')], publish_location=u'Neubiberg bei Mnchen') got_result = self.ol.create_book(book, debug=True) expected_result = { '_save': '', 'author_key': u'/authors/OL7292805A', 'author_name': u'Karl Schwarzer', 'id_name': 'isbn_10', 'id_value': u'3922238246', 'publish_date': 1982, 'publisher': u'Karamanolis', 'title': u'Alles ber Mikrofone' } self.assertTrue(got_result == expected_result, "Expected create_book to return %s, got %s" \ % (got_result, expected_result)) def test_get_work(self): work = self.ol.Work.get(u'OL12938932W') self.assertTrue(work.title.lower() == 'all quiet on the western front', "Failed to retrieve work") def test_cli(self): expected = json.loads( """{"subtitle": "a modern approach", "series": ["Prentice Hall series in artificial intelligence"], "covers": [92018], "lc_classifications": ["Q335 .R86 2003"], "latest_revision": 6, "contributions": ["Norvig, Peter."], "py/object": "olclient.openlibrary.Edition", "edition_name": "2nd ed.", "title": "Artificial intelligence", "_work": null, "languages": [{"key": "/languages/eng"}], "subjects": ["Artificial intelligence."], "publish_country": "nju", "by_statement": "Stuart J. Russell and Peter Norvig ; contributing writers, John F. Canny ... [et al.].", "type": {"key": "/type/edition"}, "revision": 6, "cover_url": "", "last_modified": {"type": "/type/datetime", "value": "2010-08-03T18:56:51.333942"}, "authors": [{"py/object": "olclient.openlibrary.Author", "bio": "", "name": "Stuart J. Russell", "links": [], "created": "2008-04-01T03:28:50.625462", "identifiers": {}, "alternate_names": ["Stuart; Norvig, Peter Russell"], "birth_date": "", "olid": null}], "publish_places": ["Upper Saddle River, N.J"], "pages": 1080, "publisher": ["Prentice Hall/Pearson Education"], "pagination": "xxviii, 1080 p. :", "work_olid": "OL2896994W", "created": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "dewey_decimal_class": ["006.3"], "notes": {"type": "/type/text", "value": "Includes bibliographical references (p. 987-1043) and index."}, "identifiers": {"librarything": ["43569"], "goodreads": ["27543"]}, "cover": "", "publish_date": "2003", "olid": "OL3702561M"}""" ) actual = json.loads( jsonpickle.encode(self.ol.Edition.get(isbn=u'0137903952'))) self.assertTrue( actual == expected, "Data didn't match: \n%s\n\nversus:\n\n %s" % (actual, expected)) actual = json.loads( jsonpickle.encode(self.ol.Edition.get(olid=u'OL3702561M'))) self.assertTrue( actual == expected, "Data didn't match: %s\n\nversus:\n\n %s" % (actual, expected))
#!/usr/bin/env python from copy import copy import sys from olclient.openlibrary import OpenLibrary """ Removes 'fake' ex-system subjects from Open Library works or editions. Takes as CLI argument a filename containing a list of Open Library keys: e.g. /works/OL1001319W /books/OL24710466M """ ol = OpenLibrary() inlist = sys.argv[1] fakes = [ 'overdrive', 'in library', 'accessible book', 'protected daisy', 'lending library', 'internet archive wishlist' ] # only remove these from works: wfakes = ['large type books', 'popular print disabled books'] otherbad = ['fictiion'] fakes += otherbad changes_made = 0 with open(inlist, 'r') as f: for item in f: olid = item.strip().replace('/books/', '').replace('/works/', '') book = ol.get(olid)
from lxml import etree import xmltodict from zipfile import ZipFile import urllib2 from io import BytesIO import isbnlib # using open library api from flask_application import app from olclient.openlibrary import OpenLibrary from collections import namedtuple Credentials = namedtuple('Credentials', ['username', 'password']) open_library = OpenLibrary(credentials=Credentials(app.config['OL_USERNAME'], app.config['OL_PASSWORD'])) # opf writing DC = "http://purl.org/dc/elements/1.1/" DCNS = "{http://purl.org/dc/elements/1.1/}" OPF = 'http://www.idpf.org/2007/opf' # Caching def cached(app, timeout=5 * 60, key='view/%s'): '''http://flask.pocoo.org/docs/patterns/viewdecorators/#caching-decorator''' def decorator(f): @wraps(f) def decorated_function(*args, **kwargs): cache_key = key % request.path
def setUp(self, mock_login): self.ol = OpenLibrary() self.truthy_values = ['yes', 'true', 't', 'y', '1'] self.falsey_values = ['no', 'false', 'f', 'n', '0']
import sys from olclient.openlibrary import OpenLibrary start = 19750 limit = 50 def is_work(thing): return thing.type['key'] == '/type/work' if __name__ == '__main__': offset = int(sys.argv[1]) ol = OpenLibrary() pos = start + offset * limit print('THIS POS', pos) #print('Workbot works w/o editions fixer') current_page = ol.session.get( ol.base_url + '/recentchanges.json?author=/people/WorkBot&offset=%d&limit=%d' % (pos, limit)) i = 0 if current_page.status_code == 200: page = current_page.json() first_date = page[0]['timestamp'][:10] print('DATE', first_date) for p in page: if p.get('comment') == 'merge works':
help= 'Import to dev.openlibrary.org Open Library dev instance for testing', action='store_true') args = parser.parse_args() item = args.item fname = args.file local_testing = args.local dev_testing = args.dev barcode = args.barcode if local_testing: Credentials = namedtuple('Credentials', ['username', 'password']) local_dev = 'http://*****:*****@example.com', 'admin123') ol = OpenLibrary(base_url=local_dev, credentials=c) elif dev_testing: ol = OpenLibrary(base_url='https://dev.openlibrary.org') else: ol = OpenLibrary() print('Importing to %s' % ol.base_url) print('ITEM: %s' % item) print('FILENAME: %s' % fname) if args.info: if barcode is True: # display available local_ids print('Available local_ids to import:') r = ol.session.get(ol.base_url + '/local_ids.json') print(LOCAL_ID.findall(r.json()['body']['value']))
# Using the Open Library Client from olclient.openlibrary import OpenLibrary import olclient.common as common import ndjson # Import os to check for file exist and # urllib to download the file import os import urllib.request # File used in the whole script FILE = 'data/wish_list_march_2018.ndjson' # Creating an object of the Open Library Client ol = OpenLibrary() # Check if a directory called data exists if not os.path.isdir("data"): os.mkdir('data') # If the required file is not available download the file if not os.path.exists(FILE): file_name = FILE urllib.request.urlretrieve( 'https://archive.org/download/openlibrary-bots/wish_list_march_2018.ndjson', file_name) def row2book(new_book): # Data of the book
class TestOpenLibrary(unittest.TestCase): @patch('olclient.openlibrary.OpenLibrary.login') def setUp(self, mock_login): self.ol = OpenLibrary() @patch('requests.Session.get') def test_get_olid_by_isbn(self, mock_get): isbn_key = 'ISBN:0374202915' isbn_bibkeys = { isbn_key: { 'info_url': 'https://openlibrary.org/books/OL23575801M/Marie_LaVeau' } } mock_get.return_value.json.return_value = isbn_bibkeys olid = self.ol.Edition.get_olid_by_isbn(u'0374202915') mock_get.assert_called_with("%s/api/books.json?bibkeys=%s" % (self.ol.base_url, isbn_key)) expected_olid = u'OL23575801M' self.assertTrue(olid == expected_olid, "Expected olid %s, got %s" % (expected_olid, olid)) @patch('requests.Session.get') def test_get_olid_notfound_by_bibkey(self, mock_get): mock_get.json_data = {} edition = self.ol.Edition.get(isbn='foobar') assert edition is None @patch('requests.Session.get') def test_get_work_by_metadata(self, mock_get): doc = { "key": u"/works/OL2514747W", "title": u"The Autobiography of Benjamin Franklin", } search_results = { 'start': 0, 'num_found': 1, 'docs': [doc] } title = u"The Autobiography of Benjamin Franklin" mock_get.return_value.json.return_value = search_results book = self.ol.Work.search(title=title) mock_get.assert_called_with("%s/search.json?title=%s" % (self.ol.base_url, title)) canonical_title = book.canonical_title self.assertTrue('franklin' in canonical_title, "Expected 'franklin' to appear in result title: %s" % \ canonical_title) @patch('requests.Session.get') def test_get_edition_by_isbn(self, mock_get): isbn_lookup_response = { u'ISBN:0374202915': { 'info_url': u'https://openlibrary.org/books/OL23575801M/Marie_LaVeau' } } edition_response = { 'key': u"/books/OL23575801M", 'title': 'test' } mock_get.return_value.json.side_effect = [isbn_lookup_response, edition_response] book = self.ol.Edition.get(isbn=u'0374202915') mock_get.assert_has_calls([ call("%s/api/books.json?bibkeys=ISBN:0374202915" % self.ol.base_url), call().raise_for_status(), call().json(), call("%s%s.json" % (self.ol.base_url, "/books/OL23575801M")), call().raise_for_status(), call().json() ]) expected_olid = u'OL23575801M' self.assertTrue(book.olid == expected_olid, "Expected olid %s, got %s" % (expected_olid, book.olid)) @patch('requests.Session.get') def test_matching_authors_olid(self, mock_get): author_autocomplete = [ {'name': u"Benjamin Franklin", 'key': u"/authors/OL26170A"} ] mock_get.return_value.json.return_value = author_autocomplete name = u'Benjamin Franklin' got_olid = self.ol.Author.get_olid_by_name(name) expected_olid = u'OL26170A' self.assertTrue(got_olid == expected_olid, "Expected olid %s, got %s" % (expected_olid, got_olid)) @patch('requests.Session.get') def test_create_book(self, mock_get): book = Book(publisher=u'Karamanolis', title=u'Alles ber Mikrofone', identifiers={'isbn_10': [u'3922238246']}, publish_date=1982, authors=[Author(name=u'Karl Schwarzer')], publish_location=u'Neubiberg bei Mnchen') author_autocomplete = [ {'name': u"Karl Schwarzer", 'key': u"/authors/OL7292805A"} ] mock_get.return_value.json.return_value = author_autocomplete got_result = self.ol.create_book(book, debug=True) mock_get.assert_called_with("%s/authors/_autocomplete?q=%s&limit=1" % (self.ol.base_url, "Karl Schwarzer")) expected_result = { '_save': '', 'author_key': u'/authors/OL7292805A', 'author_name': u'Karl Schwarzer', 'id_name': 'isbn_10', 'id_value': u'3922238246', 'publish_date': 1982, 'publisher': u'Karamanolis', 'title': u'Alles ber Mikrofone' } self.assertTrue(got_result == expected_result, "Expected create_book to return %s, got %s" \ % (expected_result, got_result)) def test_get_work(self): work_json = {u'title': u'All Quiet on the Western Front'} work = self.ol.Work(u'OL12938932W', **work_json) self.assertTrue(work.title.lower() == 'all quiet on the western front', "Failed to retrieve work") def test_work_json(self): authors=[{ "type": "/type/author_role", "author": { "key": "/authors/OL5864762A" } }] work = self.ol.Work('OL12938932W', key='/works/OL12938932W', authors=authors) work_json = work.json() self.assertEqual(work_json['key'], "/works/OL12938932W") self.assertEqual(work_json['authors'][0]['author']['key'], "/authors/OL5864762A") def test_work_validation(self): work = self.ol.Work('OL123W', title='Test Title', type={'key': '/type/work'}, revision=1, last_modified={ 'type': '/type/datetime', 'value': '2016-10-12T00:48:04.453554' }) self.assertIsNone(work.validate()) def test_edition_json(self): author = self.ol.Author('OL123A', 'Test Author') edition = self.ol.Edition(edition_olid='OL123M', work_olid='OL123W', title='Test Title', authors=[author]) edition_json = edition.json() self.assertEqual(edition_json['key'], "/books/OL123M") self.assertEqual(edition_json['works'][0], {'key': '/works/OL123W'}) self.assertEqual(edition_json['authors'][0], {'key': '/authors/OL123A'}) self.assertNotIn('work_olid', edition_json) self.assertNotIn('cover', edition_json, "'cover' is not a valid Edition property, should be list: 'covers'") def test_edition_validation(self): author = self.ol.Author('OL123A', 'Test Author') edition = self.ol.Edition(edition_olid='OL123M', work_olid='OL123W', title='Test Title', type={'key': '/type/edition'}, revision=1, last_modified={ 'type': '/type/datetime', 'value': '2016-10-12T00:48:04.453554' }, authors=[author]) self.assertIsNone(edition.validate()) orphaned_edition = self.ol.Edition(edition_olid='OL123M', work_olid=None, title='Test Title', authors=[author]) with self.assertRaises(jsonschema.exceptions.ValidationError): orphaned_edition.validate() @patch('requests.Session.get') def test_get_notfound(self, mock_get): # This tests that if requests.raise_for_status() raises an exception, # (e.g. 404 or 500 HTTP response) it is not swallowed by the client. mock_get.return_value.raise_for_status = raise_http_error suffixes = {'edition': 'M', 'work': 'W', 'author': 'A'} for _type, suffix in suffixes.items(): target = "OLnotfound%s" % suffix with pytest.raises(requests.HTTPError, message="HTTPError not raised for %s: %s" % (_type, target)): r = self.ol.get(target) @patch('requests.Session.post') def test_save_many(self, mock_post): edition = self.ol.Edition(edition_olid='OL123M', work_olid='OL12W', title='minimal edition') work = self.ol.Work(olid='OL12W', title='minimal work') self.ol.save_many([edition, work], "test comment") mock_post.assert_called_with("%s/api/save_many" % self.ol.base_url, ANY, headers=ANY) called_with_json = json.loads(mock_post.call_args[0][1]) called_with_headers = mock_post.call_args[1]['headers'] assert(len(called_with_json) == 2) self.assertIn('ns=42', called_with_headers['Opt']) self.assertEqual('test comment', called_with_headers['42-comment']) def test_delete(self): delete = self.ol.Delete('OL1W') self.assertEqual(delete.olid, 'OL1W') self.assertEqual('/type/delete', delete.json()['type']['key']) self.assertEqual('/works/OL1W', delete.json()['key']) def test_redirect(self): redirect = self.ol.Redirect(f='OL1W', t='OL2W') self.assertEqual('/type/redirect', redirect.json()['type']['key']) self.assertIn('location', redirect.json())