Пример #1
0
def add_book(data):
    # add book to openlibrary.org

    # Define a Book Object
    authors = [
        common.Author(name=author) for author in data['authors'].split(', ')
    ]
    book = common.Book(
        title=data['title'],
        authors=authors,
        publisher=data['publisher'],
        publish_date=data['year'],
        pages=data['pages'],
    )

    # Add metadata like ISBN 10 and ISBN 13
    isbn = data['isbn']
    if is_isbn10(isbn):
        book.add_id('isbn_10', isbn)
    elif is_isbn13(isbn):
        book.add_id('isbn_13', isbn)

    # Create a new book
    ol = OpenLibrary()
    new_book = ol.create_book(book)

    new_book.add_bookcover(data['cover'])
    new_book.work.add_subject(data['categories'])
    new_book.save()

    return new_book
Пример #2
0
    def __init__(self, ol=None, dry_run=True, limit=1):
        """Create logger and class variables"""
        if ol is None:
            self.ol = OpenLibrary()
        else:
            self.ol = ol

        self.changed = 0
        self.dry_run = dry_run
        self.limit = limit

        job_name = sys.argv[0].replace('.py', '')
        self.logger = logging.getLogger("jobs.%s" % job_name)
        self.logger.setLevel(logging.DEBUG)
        log_formatter = logging.Formatter(
            '%(name)s;%(levelname)-8s;%(asctime)s %(message)s')
        self.console_handler = logging.StreamHandler()
        self.console_handler.setLevel(logging.WARN)
        self.console_handler.setFormatter(log_formatter)
        self.logger.addHandler(self.console_handler)
        log_dir = 'logs/jobs/%s' % job_name
        makedirs(log_dir, exist_ok=True)
        log_file_datetime = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        log_file = log_dir + '/%s_%s.log' % (job_name, log_file_datetime)
        file_handler = logging.FileHandler(log_file)
        file_handler.setLevel(logging.DEBUG)
        file_handler.setFormatter(log_formatter)
        self.logger.addHandler(file_handler)
Пример #3
0
    def __init__(self,
                 ol=None,
                 dry_run=True,
                 limit=1,
                 job_name=__name__) -> None:
        """Create logger and class variables"""
        self.ol = ol or OpenLibrary()

        self.parser = argparse.ArgumentParser(description=__doc__)
        self.parser.add_argument('-f',
                                 '--file',
                                 type=str,
                                 default=None,
                                 help='Path to file containing input data')
        self.parser.add_argument(
            '-l, --limit',
            type=int,
            default=1,
            help='Limit number of edits performed on external data.'
            'Set to zero to allow unlimited edits.')
        self.parser.add_argument(
            '-d',
            '--dry-run',
            type=self._str2bool,
            default=True,
            help='Execute the script without performing edits on external data.'
        )
        self.args = self.parser.parse_args()
        self.dry_run = getattr(self.args, 'dry-run', None) or dry_run
        self.limit = getattr(self.args, 'limit', None) or limit
        self.changed = 0

        self.logger, self.console_handler = self.setup_logger(job_name)
 def setUp(self, mock_login):
     self.ol = OpenLibrary()
     self.strings = {'description': 'A String Description', 'notes': 'A String Note'}
     self.texts = {
         'description': {'type': '/type/text', 'value': 'A Text Description'},
         'notes': {'type': '/type/text', 'value': 'A Text Note'},
     }
Пример #5
0
    def __init__(self, ol=None, write_changes=False, limit=1, job_name=__name__) -> None:
        """Create logger and class variables"""
        self.ol = ol or OpenLibrary()

        self.parser = argparse.ArgumentParser(description=__doc__)
        self.parser.add_argument(
            '-f',
            '--file',
            type=str,
            default=None,
            help='Path to file containing input data',
        )
        self.parser.add_argument(
            '-l',
            '--limit',
            type=int,
            default=1,
            help='Limit number of edits performed on external data.'
            'Set to zero to allow unlimited edits.',
        )
        self.parser.add_argument(
            '-w',
            '--write-changes',
            action="store_true",
            help='Execute the script and write all changes to external data.',
        )
        self.args = self.parser.parse_args()
        self.write_changes = write_changes or self.args.write_changes 
        self.limit = getattr(self.args, 'limit', None) or limit
        self.changed = 0

        self.logger, self.console_handler = self.setup_logger(job_name)
Пример #6
0
def get_edition_from_work(olid):
    """Get Work and related Editions from Work olid

    The individual editions are parsed and merged into a dict"""

    # determine if we have an Edition(M) or Work(W) olid.
    if olid.endswith("M"):
        olids = [olid]
    elif olid.endswith("W"):
        OL = OpenLibrary()
        # get work and related Editions. Convert Edition class to dict
        work = OL.Work.get(olid)
        editions = [vars(e) for e in work.editions]
        olids = [edition["olid"] for edition in editions]
    else:
        raise KeyError(f"missing/wrong Work olid. Should end with 'M' or 'W'")

    # For simplicity we lookup the Edition olid using request, instead of
    # parsing the OL class
    d = {}
    for olid in olids:
        res = query(olid, bibkey="OLID")
        d = {**d, **res}
        # LOGGER.debug(f"{olid}: {res}")
    return d
Пример #7
0
def main():
    global OL
    OL = OpenLibrary()

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--query",
        help="Your Google Books query (title, author, ISBN, etc)",
        required=True)
    parser.add_argument("--google_api_key",
                        help="Your Google API key",
                        required=True)
    args = parser.parse_args()

    google_books_service = build('books',
                                 'v1',
                                 developerKey=args.google_api_key)
    google_books_request = google_books_service.volumes().list(source='public',
                                                               q=args.query)
    google_books_response = google_books_request.execute()

    number_of_google_books = google_books_response["totalItems"]
    number_of_considered_books = min(number_of_google_books,
                                     NUMBER_OF_BOOK_CHOICES)
    if number_of_google_books == 0:
        raise ValueError(
            "The Google Books API returned no results for your query. Aborting."
        )

    google_books = google_books_response["items"]
    ol_books = _ol_books_from_google_books(google_books,
                                           number_of_considered_books)

    # If query is an ISBN and Google finds a match, go ahead and upload
    if _isbn_matches(ol_books[0], args.query):
        ol_book = _ol_book_from_google_book(google_books[0])
        return _upload_ol_book(ol_book)

    # Else, let the user choose from a list
    print(
        "Google Books found {} results for this query. Here are the first {}:".
        format(number_of_google_books, number_of_considered_books))
    for i, ol_book in enumerate(ol_books):
        isbn_10 = ol_book.identifiers["isbn_10"][0]
        print("\t{}: '{}' by {} - ISBN {}".format(i, ol_book.title,
                                                  ol_book.primary_author.name,
                                                  isbn_10))

    chosen_index = int(input("Which of these would you like to upload? "))
    if chosen_index > number_of_considered_books:
        raise ValueError("Invalid choice. Aborting.")

    _upload_ol_book(ol_books[chosen_index])
Пример #8
0
def main():
    global OL
    OL = OpenLibrary()

    parser = argparse.ArgumentParser()
    parser.add_argument("--title", required=True)
    parser.add_argument("--author", default="", required=False)
    args = parser.parse_args()

    data = {}
    data["title"] = args.title
    data["authors"] = [author for author in args.author.split("; ")]
    book = create_book(data)
    return book, search_book(book)
def main():
    # Defining an Open Library Object
    ol = OpenLibrary()

    # Define a Book Object
    book = common.Book(
        title="Warlight: A novel",
        authors=[common.Author(name="Michael Ondaatje")],
        publisher="Deckle Edge",
        publish_date="2018",
    )

    # Add metadata like ISBN 10 and ISBN 13
    book.add_id('isbn_10', '0525521194')
    book.add_id('isbn_13', '978-0525521198')

    # Create a new book
    new_book = ol.create_book(book)

    # Add a book cover for the given book
    new_book.add_bookcover(
        'https://images-na.ssl-images-amazon.com/images/I/51kmM%2BvVRJL._SX337_BO1,204,203,200_.jpg'
    )
Пример #10
0
class TestAuthors(unittest.TestCase):
    @patch('olclient.openlibrary.OpenLibrary.login')
    def setUp(self, mock_login):
        self.ol = OpenLibrary()

    def test_author_validation(self):
        author = self.ol.Author('OL123A',
                                name='Test Author',
                                revision=1,
                                last_modified={
                                    'type': '/type/datetime',
                                    'value': '2016-10-12T00:48:04.453554'
                                })
        self.assertIsNone(author.validate())
Пример #11
0
def search_book(ol_book):
    "Search for a book using title and author"

    OL = OpenLibrary()
    work = OL.Work.search(title=ol_book.title, author=ol_book.primary_author.name)

    LOGGER.debug(
        f"Work found from search using:"
        f"{ol_book.primary_author.name}: {ol_book.title}\n"
        f"{work}"
    )

    try:
        identifiers = get_identifiers(work.identifiers)
        olid = identifiers["olid"][0]
    except:
        olid = ""
        work = {}

    return olid, work
Пример #12
0
 def setUp(self):
     ol_config = Config().get_config()['openlibrary']
     ol_creds = ol_config.get('credentials')
     self.ol = OpenLibrary(credentials=ol_creds)
 def setUp(self, mock_login):
     self.ol = OpenLibrary()
Пример #14
0
import re
from olclient.openlibrary import OpenLibrary

ol = OpenLibrary()


def get_type(olid):
    ol_types = {'OL..A': 'author', 'OL..M': 'book', 'OL..W': 'work'}
    kind = re.sub('\d+', '..', olid)
    return ol_types[kind]


def full_key(olid):
    return "/%ss/%s" % (get_type(olid), olid)


def full_url(olid):
    return "%s%s.json" % (ol.base_url, full_key(olid))
Пример #15
0
# iterate through archive.org items with only openlibrary
# and write back openlibrary_edition and openlibrary_work

import json
import sys
import time
import requests
from internetarchive import modify_metadata
from olclient.openlibrary import OpenLibrary

fname = sys.argv[1]

ol = OpenLibrary()

n = 0
with open(fname, 'r') as f:
   for line in f.readlines():
       data = json.loads(line)
       olid = data['openlibrary']
       ocaid = data['identifier']
       try: 
           e = ol.get(olid)
           wolid = e.work.olid
           assert wolid
       except requests.exceptions.HTTPError as e:
           print('404', olid, ocaid)
           wolid = None
       to_write = {
           'openlibrary_edition': olid
       }
       if wolid:
Пример #16
0
# Change the Python Run Time Path
import sys
sys.path.insert(0, '../')

# Import necessary libraries to use
from olclient.openlibrary import OpenLibrary
import olclient.common as common

# Defining an Open Library Object
ol = OpenLibrary()

# Define a Book Object
book = common.Book(title="Warlight: A novel",
                   authors=[common.Author(name="Michael Ondaatje")],
                   publisher="Deckle Edge",
                   publish_date="2018")

# Add metadata like ISBN 10 and ISBN 13
book.add_id('isbn_10', '0525521194')
book.add_id('isbn_13', '978-0525521198')

# Create a new book
new_book = ol.create_book(book)

# Add a book cover for the given book
new_book.add_bookcover(
    'https://images-na.ssl-images-amazon.com/images/I/51kmM%2BvVRJL._SX337_BO1,204,203,200_.jpg'
)
class TestOpenLibrary(unittest.TestCase):
    @patch('olclient.openlibrary.OpenLibrary.login')
    def setUp(self, mock_login):
        self.ol = OpenLibrary()

    @patch('requests.Session.get')
    def test_get_olid_by_isbn(self, mock_get):
        isbn_key = 'ISBN:0374202915'
        isbn_bibkeys = {
            isbn_key: {
                'info_url': 'https://openlibrary.org/books/OL23575801M/Marie_LaVeau'
            }
        }
        mock_get.return_value.json.return_value = isbn_bibkeys
        olid = self.ol.Edition.get_olid_by_isbn('0374202915')
        mock_get.assert_called_with(
            f"{self.ol.base_url}/api/books.json?bibkeys={isbn_key}"
        )
        expected_olid = 'OL23575801M'
        self.assertTrue(
            olid == expected_olid, f"Expected olid {expected_olid}, got {olid}"
        )

    @patch('requests.Session.get')
    def test_get_olid_notfound_by_bibkey(self, mock_get):
        mock_get.json_data = {}
        edition = self.ol.Edition.get(isbn='foobar')
        assert edition is None

    @patch('requests.Session.get')
    def test_get_work_by_metadata(self, mock_get):
        doc = {
            "key": "/works/OL2514747W",
            "title": "The Autobiography of Benjamin Franklin",
        }
        search_results = {'start': 0, 'num_found': 1, 'docs': [doc]}
        title = "The Autobiography of Benjamin Franklin"
        mock_get.return_value.json.return_value = search_results
        book = self.ol.Work.search(title=title)
        mock_get.assert_called_with(f"{self.ol.base_url}/search.json?title={title}")
        canonical_title = book.canonical_title
        self.assertTrue(
            'franklin' in canonical_title,
            "Expected 'franklin' to appear in result title: %s" % canonical_title,
        )

    @patch('requests.Session.get')
    def test_get_edition_by_isbn(self, mock_get):
        isbn_lookup_response = {
            'ISBN:0374202915': {
                'info_url': 'https://openlibrary.org/books/OL23575801M/Marie_LaVeau'
            }
        }
        edition_response = {'key': "/books/OL23575801M", 'title': 'test'}
        mock_get.return_value.json.side_effect = [
            isbn_lookup_response,
            edition_response,
        ]
        book = self.ol.Edition.get(isbn='0374202915')
        mock_get.assert_has_calls(
            [
                call("%s/api/books.json?bibkeys=ISBN:0374202915" % self.ol.base_url),
                call().raise_for_status(),
                call().json(),
                call("{}/books/OL23575801M.json".format(self.ol.base_url)),
                call().raise_for_status(),
                call().json(),
            ]
        )
        expected_olid = 'OL23575801M'
        self.assertTrue(
            book.olid == expected_olid,
            f"Expected olid {expected_olid}, got {book.olid}",
        )

    @patch('requests.Session.get')
    def test_matching_authors_olid(self, mock_get):
        author_autocomplete = [
            {'name': "Benjamin Franklin", 'key': "/authors/OL26170A"}
        ]
        mock_get.return_value.json.return_value = author_autocomplete
        name = 'Benjamin Franklin'
        got_olid = self.ol.Author.get_olid_by_name(name)
        expected_olid = 'OL26170A'
        self.assertTrue(
            got_olid == expected_olid, f"Expected olid {expected_olid}, got {got_olid}"
        )

    @patch('requests.Session.get')
    def test_create_book(self, mock_get):
        book = Book(
            publisher='Karamanolis',
            title='Alles ber Mikrofone',
            identifiers={'isbn_10': ['3922238246']},
            publish_date=1982,
            authors=[Author(name='Karl Schwarzer')],
            publish_location='Neubiberg bei Mnchen',
        )
        author_autocomplete = [{'name': "Karl Schwarzer", 'key': "/authors/OL7292805A"}]
        mock_get.return_value.json.return_value = author_autocomplete
        got_result = self.ol.create_book(book, debug=True)
        mock_get.assert_called_with(
            "{}/authors/_autocomplete?q={}&limit=1".format(
                self.ol.base_url, "Karl Schwarzer"
            )
        )
        expected_result = {
            '_save': '',
            'author_key': '/authors/OL7292805A',
            'author_name': 'Karl Schwarzer',
            'id_name': 'isbn_10',
            'id_value': '3922238246',
            'publish_date': 1982,
            'publisher': 'Karamanolis',
            'title': 'Alles ber Mikrofone',
        }
        self.assertTrue(
            got_result == expected_result,
            "Expected create_book to return %s, got %s" % (expected_result, got_result),
        )

    def test_get_work(self):
        work_json = {'title': 'All Quiet on the Western Front'}
        work = self.ol.Work('OL12938932W', **work_json)
        self.assertTrue(
            work.title.lower() == 'all quiet on the western front',
            "Failed to retrieve work",
        )

    def test_work_json(self):
        authors = [
            {"type": "/type/author_role", "author": {"key": "/authors/OL5864762A"}}
        ]
        work = self.ol.Work('OL12938932W', key='/works/OL12938932W', authors=authors)
        work_json = work.json()
        self.assertEqual(work_json['key'], "/works/OL12938932W")
        self.assertEqual(
            work_json['authors'][0]['author']['key'], "/authors/OL5864762A"
        )

    def test_work_validation(self):
        work = self.ol.Work(
            'OL123W',
            title='Test Title',
            type={'key': '/type/work'},
            revision=1,
            last_modified={
                'type': '/type/datetime',
                'value': '2016-10-12T00:48:04.453554',
            },
        )
        self.assertIsNone(work.validate())

    def test_edition_json(self):
        author = self.ol.Author('OL123A', 'Test Author')
        edition = self.ol.Edition(
            edition_olid='OL123M',
            work_olid='OL123W',
            title='Test Title',
            authors=[author],
        )
        edition_json = edition.json()
        self.assertEqual(edition_json['key'], "/books/OL123M")
        self.assertEqual(edition_json['works'][0], {'key': '/works/OL123W'})
        self.assertEqual(edition_json['authors'][0], {'key': '/authors/OL123A'})

        self.assertNotIn('work_olid', edition_json)
        self.assertNotIn(
            'cover',
            edition_json,
            "'cover' is not a valid Edition property, should be list: 'covers'",
        )

    def test_edition_validation(self):
        author = self.ol.Author('OL123A', 'Test Author')
        edition = self.ol.Edition(
            edition_olid='OL123M',
            work_olid='OL123W',
            title='Test Title',
            type={'key': '/type/edition'},
            revision=1,
            last_modified={
                'type': '/type/datetime',
                'value': '2016-10-12T00:48:04.453554',
            },
            authors=[author],
        )
        self.assertIsNone(edition.validate())
        orphaned_edition = self.ol.Edition(
            edition_olid='OL123M', work_olid=None, title='Test Title', authors=[author]
        )
        with self.assertRaises(jsonschema.exceptions.ValidationError):
            orphaned_edition.validate()

    @patch('requests.Session.get')
    def test_get_notfound(self, mock_get):
        # This tests that if requests.raise_for_status() raises an exception,
        # (e.g. 404 or 500 HTTP response) it is not swallowed by the client.
        mock_get.return_value.raise_for_status = raise_http_error
        suffixes = {'edition': 'M', 'work': 'W', 'author': 'A'}
        for _type, suffix in suffixes.items():
            target = "OLnotfound%s" % suffix
            with pytest.raises(requests.HTTPError):
                r = self.ol.get(target)
                pytest.fail(f"HTTPError not raised for {_type}: {target}")

    @patch('requests.Session.post')
    def test_save_many(self, mock_post):
        edition = self.ol.Edition(
            edition_olid='OL123M', work_olid='OL12W', title='minimal edition'
        )
        work = self.ol.Work(olid='OL12W', title='minimal work')
        self.ol.save_many([edition, work], "test comment")
        mock_post.assert_called_with(
            "%s/api/save_many" % self.ol.base_url, ANY, headers=ANY
        )
        called_with_json = json.loads(mock_post.call_args[0][1])
        called_with_headers = mock_post.call_args[1]['headers']
        assert len(called_with_json) == 2
        self.assertIn('ns=42', called_with_headers['Opt'])
        self.assertEqual('test comment', called_with_headers['42-comment'])

    def test_delete(self):
        delete = self.ol.Delete('OL1W')
        self.assertEqual(delete.olid, 'OL1W')
        self.assertEqual('/type/delete', delete.json()['type']['key'])
        self.assertEqual('/works/OL1W', delete.json()['key'])

    def test_redirect(self):
        redirect = self.ol.Redirect(f='OL1W', t='OL2W')
        self.assertEqual('/type/redirect', redirect.json()['type']['key'])
        self.assertIn('location', redirect.json())
Пример #18
0
    parser.add_argument('-f', '--file', help='Bulk MARC file to import')
    parser.add_argument('-l', '--limit', help='Number of records to import', type=int, default=1)
    parser.add_argument('-o', '--offset', help='Offset in BYTES from which to start importing', type=int, default=0)

    args = parser.parse_args()
    item = args.item
    fname = args.file

    if args.info:
        # List MARC21 files, then quit.
        for f in get_marc21_files(item):
            print(f)
        exit()

    if LIVE:
        ol = OpenLibrary()
        #ol = OpenLibrary(base_url='https://dev.openlibrary.org')
    else:
        local_dev = 'http://*****:*****@example.com', 'admin123')
        ol = OpenLibrary(base_url=local_dev, credentials=c)

    limit = args.limit  # if non-zero, a limit to only process this many records from each file
    count = 0

    print('FILENAME: %s' % fname)
    offset = args.offset
    length = 5  # we only need to get the length of the first record (first 5 bytes), the API will seek to the end.

    while length:
        if limit and count >= limit:
Пример #19
0
#!/usr/bin/env python3
import json
from olclient.openlibrary import OpenLibrary
ol = OpenLibrary()

infile = "olids-to-update.txt"

# Takes an infile and writes ocaids to Open Library items and performs a sync.

# infile is the json output of an archive.org search query
# containing 'openlibrary' (edition olid) and 'identifier' (ocaid) fields


def sync_ol_to_ia(olid):
    r = ol.session.get(ol.base_url + "/admin/sync?edition_id=" + olid)
    if r.status_code == 500:
        content = {'error': 'HTTP 500'}
    else:
        content = r.json()
    if 'error' in content and 'no changes to _meta.xml' not in content[
            'error']:  # and r.json()['error'] == 'No qualifying edition':
        print("%s, %s: %s" % (olid, ocaid, content))
    return r.status_code


# start and end are False or line numbers in infile to begin and stop processing
# Used in case there is a need to resume or re-run part of a batch.
start = False
end = False
with open(infile) as f:
    for count, line in enumerate(f):
 def setUp(self, mock_login):
     self.ol = OpenLibrary()
Пример #21
0
        parser.print_help(sys.stderr)
        sys.exit(1)

    args = parser.parse_args()
    item = args.item
    fname = args.file
    local_testing = args.local
    dev_testing = args.testing
    staging_testing = args.staging
    barcode = args.barcode

    if local_testing:
        Credentials = namedtuple('Credentials', ['username', 'password'])
        local_dev = 'http://*****:*****@example.com', 'admin123')
        ol = OpenLibrary(base_url=local_dev, credentials=c)
    elif staging_testing:
        ol = OpenLibrary(base_url='https://staging.openlibrary.org')
    elif dev_testing:
        ol = OpenLibrary(base_url='https://testing.openlibrary.org')
    else:
        ol = OpenLibrary()

    print('Importing to %s' % ol.base_url)
    print('ITEM: %s' % item)
    print('FILENAME: %s' % fname)

    if args.info:
        if barcode is True:
            # display available local_ids
            print('Available local_ids to import:')
Пример #22
0
class TestOpenLibrary(unittest.TestCase):
    def setUp(self):
        ol_config = Config().get_config()['openlibrary']
        ol_creds = ol_config.get('credentials')
        self.ol = OpenLibrary(credentials=ol_creds)

    def test_get_olid_by_isbn(self):
        olid = self.ol.Edition.get_olid_by_isbn(u'0374202915')
        expected_olid = u'OL23575801M'
        self.assertTrue(olid == expected_olid,
                        "Expected olid %s, got %s" % (expected_olid, olid))

    def test_get_work_by_metadata(self):
        title = u"The Autobiography of Benjamin Franklin"
        book = self.ol.Work.get_by_metadata(title=title)
        canonical_title = book.canonical_title
        self.assertTrue('franklin' in canonical_title,
                        "Expected 'franklin' to appear in result title: %s" % \
                        canonical_title)

    def test_get_edition_by_isbn(self):
        book = self.ol.Edition.get(isbn=u'0374202915')
        expected_olid = u'OL23575801M'
        self.assertTrue(
            book.olid == expected_olid,
            "Expected olid %s, got %s" % (expected_olid, book.olid))

    def test_matching_authors_olid(self):
        name = u'Benjamin Franklin'
        got_olid = self.ol.Author.get_olid_by_name(name)
        expected_olid = u'OL26170A'
        self.assertTrue(got_olid == expected_olid,
                        "Expected olid %s, got %s" % (expected_olid, got_olid))

    def test_create_book(self):
        book = Book(publisher=u'Karamanolis',
                    title=u'Alles ber Mikrofone',
                    identifiers={'isbn_10': [u'3922238246']},
                    publish_date=1982,
                    authors=[Author(name=u'Karl Schwarzer')],
                    publish_location=u'Neubiberg bei Mnchen')
        got_result = self.ol.create_book(book, debug=True)
        expected_result = {
            '_save': '',
            'author_key': u'/authors/OL7292805A',
            'author_name': u'Karl Schwarzer',
            'id_name': 'isbn_10',
            'id_value': u'3922238246',
            'publish_date': 1982,
            'publisher': u'Karamanolis',
            'title': u'Alles ber Mikrofone'
        }
        self.assertTrue(got_result == expected_result,
                        "Expected create_book to return %s, got %s" \
                        % (got_result, expected_result))

    def test_get_work(self):
        work = self.ol.Work.get(u'OL12938932W')
        self.assertTrue(work.title.lower() == 'all quiet on the western front',
                        "Failed to retrieve work")

    def test_cli(self):
        expected = json.loads(
            """{"subtitle": "a modern approach", "series": ["Prentice Hall series in artificial intelligence"], "covers": [92018], "lc_classifications": ["Q335 .R86 2003"], "latest_revision": 6, "contributions": ["Norvig, Peter."], "py/object": "olclient.openlibrary.Edition", "edition_name": "2nd ed.", "title": "Artificial intelligence", "_work": null, "languages": [{"key": "/languages/eng"}], "subjects": ["Artificial intelligence."], "publish_country": "nju", "by_statement": "Stuart J. Russell and Peter Norvig ; contributing writers, John F. Canny ... [et al.].", "type": {"key": "/type/edition"}, "revision": 6, "cover_url": "", "last_modified": {"type": "/type/datetime", "value": "2010-08-03T18:56:51.333942"}, "authors": [{"py/object": "olclient.openlibrary.Author", "bio": "", "name": "Stuart J. Russell", "links": [], "created": "2008-04-01T03:28:50.625462", "identifiers": {}, "alternate_names": ["Stuart; Norvig, Peter Russell"], "birth_date": "", "olid": null}], "publish_places": ["Upper Saddle River, N.J"], "pages": 1080, "publisher": ["Prentice Hall/Pearson Education"], "pagination": "xxviii, 1080 p. :", "work_olid": "OL2896994W", "created": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "dewey_decimal_class": ["006.3"], "notes": {"type": "/type/text", "value": "Includes bibliographical references (p. 987-1043) and index."}, "identifiers": {"librarything": ["43569"], "goodreads": ["27543"]}, "cover": "", "publish_date": "2003", "olid": "OL3702561M"}"""
        )

        actual = json.loads(
            jsonpickle.encode(self.ol.Edition.get(isbn=u'0137903952')))
        self.assertTrue(
            actual == expected,
            "Data didn't match: \n%s\n\nversus:\n\n %s" % (actual, expected))
        actual = json.loads(
            jsonpickle.encode(self.ol.Edition.get(olid=u'OL3702561M')))
        self.assertTrue(
            actual == expected,
            "Data didn't match: %s\n\nversus:\n\n %s" % (actual, expected))
Пример #23
0
#!/usr/bin/env python
from copy import copy
import sys
from olclient.openlibrary import OpenLibrary
"""
   Removes 'fake' ex-system subjects from Open Library works or editions.
   Takes as CLI argument a filename containing a list of Open Library keys:
   e.g.
     /works/OL1001319W
     /books/OL24710466M
"""

ol = OpenLibrary()

inlist = sys.argv[1]

fakes = [
    'overdrive', 'in library', 'accessible book', 'protected daisy',
    'lending library', 'internet archive wishlist'
]
# only remove these from works:
wfakes = ['large type books', 'popular print disabled books']

otherbad = ['fictiion']

fakes += otherbad
changes_made = 0
with open(inlist, 'r') as f:
    for item in f:
        olid = item.strip().replace('/books/', '').replace('/works/', '')
        book = ol.get(olid)
Пример #24
0
from lxml import etree
import xmltodict

from zipfile import ZipFile
import urllib2
from io import BytesIO

import isbnlib

# using open library api
from flask_application import app
from olclient.openlibrary import OpenLibrary
from collections import namedtuple
Credentials = namedtuple('Credentials', ['username', 'password'])
open_library = OpenLibrary(credentials=Credentials(app.config['OL_USERNAME'],
                                                   app.config['OL_PASSWORD']))

# opf writing
DC = "http://purl.org/dc/elements/1.1/"
DCNS = "{http://purl.org/dc/elements/1.1/}"
OPF = 'http://www.idpf.org/2007/opf'

# Caching


def cached(app, timeout=5 * 60, key='view/%s'):
    '''http://flask.pocoo.org/docs/patterns/viewdecorators/#caching-decorator'''
    def decorator(f):
        @wraps(f)
        def decorated_function(*args, **kwargs):
            cache_key = key % request.path
Пример #25
0
 def setUp(self, mock_login):
     self.ol = OpenLibrary()
     self.truthy_values = ['yes', 'true', 't', 'y', '1']
     self.falsey_values = ['no', 'false', 'f', 'n', '0']
Пример #26
0
import sys

from olclient.openlibrary import OpenLibrary

start = 19750
limit = 50


def is_work(thing):
    return thing.type['key'] == '/type/work'


if __name__ == '__main__':
    offset = int(sys.argv[1])
    ol = OpenLibrary()
    pos = start + offset * limit
    print('THIS POS', pos)
    #print('Workbot works w/o editions fixer')

    current_page = ol.session.get(
        ol.base_url +
        '/recentchanges.json?author=/people/WorkBot&offset=%d&limit=%d' %
        (pos, limit))
    i = 0
    if current_page.status_code == 200:
        page = current_page.json()
        first_date = page[0]['timestamp'][:10]
        print('DATE', first_date)
        for p in page:
            if p.get('comment') == 'merge works':
Пример #27
0
        help=
        'Import to dev.openlibrary.org Open Library dev instance for testing',
        action='store_true')

    args = parser.parse_args()
    item = args.item
    fname = args.file
    local_testing = args.local
    dev_testing = args.dev
    barcode = args.barcode

    if local_testing:
        Credentials = namedtuple('Credentials', ['username', 'password'])
        local_dev = 'http://*****:*****@example.com', 'admin123')
        ol = OpenLibrary(base_url=local_dev, credentials=c)
    elif dev_testing:
        ol = OpenLibrary(base_url='https://dev.openlibrary.org')
    else:
        ol = OpenLibrary()

    print('Importing to %s' % ol.base_url)
    print('ITEM: %s' % item)
    print('FILENAME: %s' % fname)

    if args.info:
        if barcode is True:
            # display available local_ids
            print('Available local_ids to import:')
            r = ol.session.get(ol.base_url + '/local_ids.json')
            print(LOCAL_ID.findall(r.json()['body']['value']))
Пример #28
0
# Using the Open Library Client
from olclient.openlibrary import OpenLibrary
import olclient.common as common
import ndjson

# Import os to check for file exist and
# urllib to download the file
import os
import urllib.request

# File used in the whole script
FILE = 'data/wish_list_march_2018.ndjson'

# Creating an object of the Open Library Client
ol = OpenLibrary()

# Check if a directory called data exists
if not os.path.isdir("data"):
    os.mkdir('data')

# If the required file is not available download the file
if not os.path.exists(FILE):
    file_name = FILE
    urllib.request.urlretrieve(
        'https://archive.org/download/openlibrary-bots/wish_list_march_2018.ndjson',
        file_name)


def row2book(new_book):
    # Data of the book
class TestOpenLibrary(unittest.TestCase):

    @patch('olclient.openlibrary.OpenLibrary.login')
    def setUp(self, mock_login):
        self.ol = OpenLibrary()

    @patch('requests.Session.get')
    def test_get_olid_by_isbn(self, mock_get):
        isbn_key = 'ISBN:0374202915'
        isbn_bibkeys = { isbn_key: { 'info_url': 'https://openlibrary.org/books/OL23575801M/Marie_LaVeau' } }
        mock_get.return_value.json.return_value = isbn_bibkeys
        olid = self.ol.Edition.get_olid_by_isbn(u'0374202915')
        mock_get.assert_called_with("%s/api/books.json?bibkeys=%s" % (self.ol.base_url, isbn_key))
        expected_olid = u'OL23575801M'
        self.assertTrue(olid == expected_olid,
                        "Expected olid %s, got %s" % (expected_olid, olid))

    @patch('requests.Session.get')
    def test_get_olid_notfound_by_bibkey(self, mock_get):
        mock_get.json_data = {}
        edition = self.ol.Edition.get(isbn='foobar')
        assert edition is None

    @patch('requests.Session.get')
    def test_get_work_by_metadata(self, mock_get):
        doc = {
            "key":    u"/works/OL2514747W",
            "title":  u"The Autobiography of Benjamin Franklin",
        }
        search_results = { 'start': 0, 'num_found': 1, 'docs': [doc] }
        title = u"The Autobiography of Benjamin Franklin"
        mock_get.return_value.json.return_value = search_results
        book = self.ol.Work.search(title=title)
        mock_get.assert_called_with("%s/search.json?title=%s" % (self.ol.base_url, title))
        canonical_title = book.canonical_title
        self.assertTrue('franklin' in canonical_title,
                        "Expected 'franklin' to appear in result title: %s" % \
                        canonical_title)

    @patch('requests.Session.get')
    def test_get_edition_by_isbn(self, mock_get):
        isbn_lookup_response = { u'ISBN:0374202915': { 'info_url': u'https://openlibrary.org/books/OL23575801M/Marie_LaVeau' } }
        edition_response = { 'key': u"/books/OL23575801M", 'title': 'test' }
        mock_get.return_value.json.side_effect = [isbn_lookup_response, edition_response]
        book = self.ol.Edition.get(isbn=u'0374202915')
        mock_get.assert_has_calls([
            call("%s/api/books.json?bibkeys=ISBN:0374202915" % self.ol.base_url),
            call().raise_for_status(),
            call().json(),
            call("%s%s.json" % (self.ol.base_url, "/books/OL23575801M")),
            call().raise_for_status(),
            call().json()
        ])
        expected_olid = u'OL23575801M'
        self.assertTrue(book.olid == expected_olid,
                        "Expected olid %s, got %s" % (expected_olid, book.olid))

    @patch('requests.Session.get')
    def test_matching_authors_olid(self, mock_get):
        author_autocomplete = [ {'name': u"Benjamin Franklin", 'key': u"/authors/OL26170A"} ]
        mock_get.return_value.json.return_value = author_autocomplete
        name = u'Benjamin Franklin'
        got_olid = self.ol.Author.get_olid_by_name(name)
        expected_olid = u'OL26170A'
        self.assertTrue(got_olid == expected_olid,
                        "Expected olid %s, got %s" % (expected_olid, got_olid))

    @patch('requests.Session.get')
    def test_create_book(self, mock_get):
        book = Book(publisher=u'Karamanolis', title=u'Alles ber Mikrofone',
                    identifiers={'isbn_10': [u'3922238246']}, publish_date=1982,
                    authors=[Author(name=u'Karl Schwarzer')],
                    publish_location=u'Neubiberg bei Mnchen')
        author_autocomplete = [ {'name': u"Karl Schwarzer", 'key': u"/authors/OL7292805A"} ]
        mock_get.return_value.json.return_value = author_autocomplete
        got_result = self.ol.create_book(book, debug=True)
        mock_get.assert_called_with("%s/authors/_autocomplete?q=%s&limit=1" % (self.ol.base_url, "Karl Schwarzer"))
        expected_result = {
            '_save': '',
            'author_key': u'/authors/OL7292805A',
            'author_name': u'Karl Schwarzer',
            'id_name': 'isbn_10',
            'id_value': u'3922238246',
            'publish_date': 1982,
            'publisher': u'Karamanolis',
            'title': u'Alles ber Mikrofone'
        }
        self.assertTrue(got_result == expected_result,
                        "Expected create_book to return %s, got %s" \
                        % (expected_result, got_result))

    def test_get_work(self):
        work_json = {u'title': u'All Quiet on the Western Front'}
        work = self.ol.Work(u'OL12938932W', **work_json)
        self.assertTrue(work.title.lower() == 'all quiet on the western front',
                        "Failed to retrieve work")

    def test_work_json(self):
        authors=[{ "type": "/type/author_role",
                   "author": { "key": "/authors/OL5864762A" }
                }]
        work = self.ol.Work('OL12938932W',
                            key='/works/OL12938932W',
                            authors=authors)
        work_json = work.json()
        self.assertEqual(work_json['key'], "/works/OL12938932W")
        self.assertEqual(work_json['authors'][0]['author']['key'], "/authors/OL5864762A")

    def test_work_validation(self):
        work = self.ol.Work('OL123W',
                            title='Test Title',
                            type={'key': '/type/work'},
                            revision=1,
                            last_modified={
                              'type': '/type/datetime',
                              'value': '2016-10-12T00:48:04.453554'
                            })
        self.assertIsNone(work.validate())

    def test_edition_json(self):
        author = self.ol.Author('OL123A', 'Test Author')
        edition = self.ol.Edition(edition_olid='OL123M',
                                  work_olid='OL123W',
                                  title='Test Title',
                                  authors=[author])
        edition_json = edition.json()
        self.assertEqual(edition_json['key'], "/books/OL123M")
        self.assertEqual(edition_json['works'][0], {'key': '/works/OL123W'})
        self.assertEqual(edition_json['authors'][0], {'key': '/authors/OL123A'})

        self.assertNotIn('work_olid', edition_json)
        self.assertNotIn('cover', edition_json,
                         "'cover' is not a valid Edition property, should be list: 'covers'")

    def test_edition_validation(self):
        author = self.ol.Author('OL123A', 'Test Author')
        edition = self.ol.Edition(edition_olid='OL123M',
                                  work_olid='OL123W',
                                  title='Test Title',
                                  type={'key': '/type/edition'},
                                  revision=1,
                                  last_modified={
                                      'type': '/type/datetime',
                                      'value': '2016-10-12T00:48:04.453554'
                                  },
                                  authors=[author])
        self.assertIsNone(edition.validate())
        orphaned_edition = self.ol.Edition(edition_olid='OL123M',
                                  work_olid=None,
                                  title='Test Title',
                                  authors=[author])
        with self.assertRaises(jsonschema.exceptions.ValidationError):
            orphaned_edition.validate()

    @patch('requests.Session.get')
    def test_get_notfound(self, mock_get):
        # This tests that if requests.raise_for_status() raises an exception,
        # (e.g. 404 or 500 HTTP response) it is not swallowed by the client.
        mock_get.return_value.raise_for_status = raise_http_error
        suffixes = {'edition': 'M', 'work': 'W', 'author': 'A'}
        for _type, suffix in suffixes.items():
            target = "OLnotfound%s" % suffix
            with pytest.raises(requests.HTTPError, message="HTTPError not raised for %s: %s" % (_type, target)):
                r = self.ol.get(target)

    @patch('requests.Session.post')
    def test_save_many(self, mock_post):
        edition = self.ol.Edition(edition_olid='OL123M', work_olid='OL12W', title='minimal edition')
        work    = self.ol.Work(olid='OL12W', title='minimal work')
        self.ol.save_many([edition, work], "test comment")
        mock_post.assert_called_with("%s/api/save_many" % self.ol.base_url, ANY, headers=ANY)
        called_with_json    = json.loads(mock_post.call_args[0][1])
        called_with_headers = mock_post.call_args[1]['headers']
        assert(len(called_with_json) == 2)
        self.assertIn('ns=42', called_with_headers['Opt'])
        self.assertEqual('test comment', called_with_headers['42-comment'])

    def test_delete(self):
        delete = self.ol.Delete('OL1W')
        self.assertEqual(delete.olid, 'OL1W')
        self.assertEqual('/type/delete', delete.json()['type']['key'])
        self.assertEqual('/works/OL1W', delete.json()['key'])

    def test_redirect(self):
        redirect = self.ol.Redirect(f='OL1W', t='OL2W')
        self.assertEqual('/type/redirect', redirect.json()['type']['key'])
        self.assertIn('location', redirect.json())