Пример #1
0
     print('DATE', first_date)
     for p in page:
         if p.get('comment') == 'merge works':
             #print('FOUND %s' % p)
             for c in p.get('changes') or []:
                 rev = c.get('revision', 2)
                 eprev = ol.session.get(ol.base_url + c.get('key') +
                                        '.json?v=%d' % (rev - 1))
                 if eprev.status_code == 200:
                     if eprev.json().get('works') is None:
                         # It's probably a work
                         #print('ORPHAN FOUND!', c.get('key'))
                         continue
                     wprev = eprev.json().get('works')[0].get(
                         'key').replace('/works/', '')
                     cedition = ol.get(c.get('key').replace('/books/', ''))
                     if cedition.work_olid is None:
                         print('ERROR: problem getting work for %s' % c)
                         continue
                     wnext = cedition.work
                     w = ol.get(wprev)
                     #print(w.olid, w.editions)
                     if is_work(w) and not w.editions:
                         print('Work without editions found! %s' % w.olid)
                         print('    redirect to current work:', wnext.olid)
                         assert w.olid != wnext.olid
                         assert is_work(wnext)
                         redirect = ol.Redirect(f=w.olid, t=wnext.olid)
                         print(redirect.save('redirect to duplicate work'))
                         i += 1
 print(i, 'changes made.')
class TestOpenLibrary(unittest.TestCase):
    @patch('olclient.openlibrary.OpenLibrary.login')
    def setUp(self, mock_login):
        self.ol = OpenLibrary()

    @patch('requests.Session.get')
    def test_get_olid_by_isbn(self, mock_get):
        isbn_key = 'ISBN:0374202915'
        isbn_bibkeys = {
            isbn_key: {
                'info_url': 'https://openlibrary.org/books/OL23575801M/Marie_LaVeau'
            }
        }
        mock_get.return_value.json.return_value = isbn_bibkeys
        olid = self.ol.Edition.get_olid_by_isbn('0374202915')
        mock_get.assert_called_with(
            f"{self.ol.base_url}/api/books.json?bibkeys={isbn_key}"
        )
        expected_olid = 'OL23575801M'
        self.assertTrue(
            olid == expected_olid, f"Expected olid {expected_olid}, got {olid}"
        )

    @patch('requests.Session.get')
    def test_get_olid_notfound_by_bibkey(self, mock_get):
        mock_get.json_data = {}
        edition = self.ol.Edition.get(isbn='foobar')
        assert edition is None

    @patch('requests.Session.get')
    def test_get_work_by_metadata(self, mock_get):
        doc = {
            "key": "/works/OL2514747W",
            "title": "The Autobiography of Benjamin Franklin",
        }
        search_results = {'start': 0, 'num_found': 1, 'docs': [doc]}
        title = "The Autobiography of Benjamin Franklin"
        mock_get.return_value.json.return_value = search_results
        book = self.ol.Work.search(title=title)
        mock_get.assert_called_with(f"{self.ol.base_url}/search.json?title={title}")
        canonical_title = book.canonical_title
        self.assertTrue(
            'franklin' in canonical_title,
            "Expected 'franklin' to appear in result title: %s" % canonical_title,
        )

    @patch('requests.Session.get')
    def test_get_edition_by_isbn(self, mock_get):
        isbn_lookup_response = {
            'ISBN:0374202915': {
                'info_url': 'https://openlibrary.org/books/OL23575801M/Marie_LaVeau'
            }
        }
        edition_response = {'key': "/books/OL23575801M", 'title': 'test'}
        mock_get.return_value.json.side_effect = [
            isbn_lookup_response,
            edition_response,
        ]
        book = self.ol.Edition.get(isbn='0374202915')
        mock_get.assert_has_calls(
            [
                call("%s/api/books.json?bibkeys=ISBN:0374202915" % self.ol.base_url),
                call().raise_for_status(),
                call().json(),
                call("{}/books/OL23575801M.json".format(self.ol.base_url)),
                call().raise_for_status(),
                call().json(),
            ]
        )
        expected_olid = 'OL23575801M'
        self.assertTrue(
            book.olid == expected_olid,
            f"Expected olid {expected_olid}, got {book.olid}",
        )

    @patch('requests.Session.get')
    def test_matching_authors_olid(self, mock_get):
        author_autocomplete = [
            {'name': "Benjamin Franklin", 'key': "/authors/OL26170A"}
        ]
        mock_get.return_value.json.return_value = author_autocomplete
        name = 'Benjamin Franklin'
        got_olid = self.ol.Author.get_olid_by_name(name)
        expected_olid = 'OL26170A'
        self.assertTrue(
            got_olid == expected_olid, f"Expected olid {expected_olid}, got {got_olid}"
        )

    @patch('requests.Session.get')
    def test_create_book(self, mock_get):
        book = Book(
            publisher='Karamanolis',
            title='Alles ber Mikrofone',
            identifiers={'isbn_10': ['3922238246']},
            publish_date=1982,
            authors=[Author(name='Karl Schwarzer')],
            publish_location='Neubiberg bei Mnchen',
        )
        author_autocomplete = [{'name': "Karl Schwarzer", 'key': "/authors/OL7292805A"}]
        mock_get.return_value.json.return_value = author_autocomplete
        got_result = self.ol.create_book(book, debug=True)
        mock_get.assert_called_with(
            "{}/authors/_autocomplete?q={}&limit=1".format(
                self.ol.base_url, "Karl Schwarzer"
            )
        )
        expected_result = {
            '_save': '',
            'author_key': '/authors/OL7292805A',
            'author_name': 'Karl Schwarzer',
            'id_name': 'isbn_10',
            'id_value': '3922238246',
            'publish_date': 1982,
            'publisher': 'Karamanolis',
            'title': 'Alles ber Mikrofone',
        }
        self.assertTrue(
            got_result == expected_result,
            "Expected create_book to return %s, got %s" % (expected_result, got_result),
        )

    def test_get_work(self):
        work_json = {'title': 'All Quiet on the Western Front'}
        work = self.ol.Work('OL12938932W', **work_json)
        self.assertTrue(
            work.title.lower() == 'all quiet on the western front',
            "Failed to retrieve work",
        )

    def test_work_json(self):
        authors = [
            {"type": "/type/author_role", "author": {"key": "/authors/OL5864762A"}}
        ]
        work = self.ol.Work('OL12938932W', key='/works/OL12938932W', authors=authors)
        work_json = work.json()
        self.assertEqual(work_json['key'], "/works/OL12938932W")
        self.assertEqual(
            work_json['authors'][0]['author']['key'], "/authors/OL5864762A"
        )

    def test_work_validation(self):
        work = self.ol.Work(
            'OL123W',
            title='Test Title',
            type={'key': '/type/work'},
            revision=1,
            last_modified={
                'type': '/type/datetime',
                'value': '2016-10-12T00:48:04.453554',
            },
        )
        self.assertIsNone(work.validate())

    def test_edition_json(self):
        author = self.ol.Author('OL123A', 'Test Author')
        edition = self.ol.Edition(
            edition_olid='OL123M',
            work_olid='OL123W',
            title='Test Title',
            authors=[author],
        )
        edition_json = edition.json()
        self.assertEqual(edition_json['key'], "/books/OL123M")
        self.assertEqual(edition_json['works'][0], {'key': '/works/OL123W'})
        self.assertEqual(edition_json['authors'][0], {'key': '/authors/OL123A'})

        self.assertNotIn('work_olid', edition_json)
        self.assertNotIn(
            'cover',
            edition_json,
            "'cover' is not a valid Edition property, should be list: 'covers'",
        )

    def test_edition_validation(self):
        author = self.ol.Author('OL123A', 'Test Author')
        edition = self.ol.Edition(
            edition_olid='OL123M',
            work_olid='OL123W',
            title='Test Title',
            type={'key': '/type/edition'},
            revision=1,
            last_modified={
                'type': '/type/datetime',
                'value': '2016-10-12T00:48:04.453554',
            },
            authors=[author],
        )
        self.assertIsNone(edition.validate())
        orphaned_edition = self.ol.Edition(
            edition_olid='OL123M', work_olid=None, title='Test Title', authors=[author]
        )
        with self.assertRaises(jsonschema.exceptions.ValidationError):
            orphaned_edition.validate()

    @patch('requests.Session.get')
    def test_get_notfound(self, mock_get):
        # This tests that if requests.raise_for_status() raises an exception,
        # (e.g. 404 or 500 HTTP response) it is not swallowed by the client.
        mock_get.return_value.raise_for_status = raise_http_error
        suffixes = {'edition': 'M', 'work': 'W', 'author': 'A'}
        for _type, suffix in suffixes.items():
            target = "OLnotfound%s" % suffix
            with pytest.raises(requests.HTTPError):
                r = self.ol.get(target)
                pytest.fail(f"HTTPError not raised for {_type}: {target}")

    @patch('requests.Session.post')
    def test_save_many(self, mock_post):
        edition = self.ol.Edition(
            edition_olid='OL123M', work_olid='OL12W', title='minimal edition'
        )
        work = self.ol.Work(olid='OL12W', title='minimal work')
        self.ol.save_many([edition, work], "test comment")
        mock_post.assert_called_with(
            "%s/api/save_many" % self.ol.base_url, ANY, headers=ANY
        )
        called_with_json = json.loads(mock_post.call_args[0][1])
        called_with_headers = mock_post.call_args[1]['headers']
        assert len(called_with_json) == 2
        self.assertIn('ns=42', called_with_headers['Opt'])
        self.assertEqual('test comment', called_with_headers['42-comment'])

    def test_delete(self):
        delete = self.ol.Delete('OL1W')
        self.assertEqual(delete.olid, 'OL1W')
        self.assertEqual('/type/delete', delete.json()['type']['key'])
        self.assertEqual('/works/OL1W', delete.json()['key'])

    def test_redirect(self):
        redirect = self.ol.Redirect(f='OL1W', t='OL2W')
        self.assertEqual('/type/redirect', redirect.json()['type']['key'])
        self.assertIn('location', redirect.json())
Пример #3
0
# start and end are False or line numbers in infile to begin and stop processing
# Used in case there is a need to resume or re-run part of a batch.
start = False
end = False
with open(infile) as f:
    for count, line in enumerate(f):
        # OLD TSV FORMAT: ocaid, olid = line.split()
        data = json.loads(line)
        ocaid = data.get('identifier')
        olid = data.get('openlibrary')
        if start and count < start:
            continue
        if end and count > end:
            break
        # check and add ocaid to OL edition
        print("Adding %s to %s" % (ocaid, olid))
        edition = ol.get(olid)
        assert edition.title, "Missing title in %s!" % olid

        if hasattr(edition, 'ocaid'):
            print("  OCAID already found: %s" % edition.ocaid)
        else:
            edition.ocaid = ocaid
            edition.save('add ocaid')
        # sync the edition
        r = sync_ol_to_ia(olid)
        if r == 500:
            edition.ocaid = ocaid
            edition.save('update ocaid')
            sync_ol_to_ia(olid)
Пример #4
0
import requests
from internetarchive import modify_metadata
from olclient.openlibrary import OpenLibrary

fname = sys.argv[1]

ol = OpenLibrary()

n = 0
with open(fname, 'r') as f:
   for line in f.readlines():
       data = json.loads(line)
       olid = data['openlibrary']
       ocaid = data['identifier']
       try: 
           e = ol.get(olid)
           wolid = e.work.olid
           assert wolid
       except requests.exceptions.HTTPError as e:
           print('404', olid, ocaid)
           wolid = None
       to_write = {
           'openlibrary_edition': olid
       }
       if wolid:
           to_write['openlibrary_work'] = wolid
       #print(ocaid, to_write)
       r = modify_metadata(ocaid, metadata=to_write)
       print('%s: %s' % (ocaid, r.status_code))
       n += 1
       if n > 300:
Пример #5
0
fakes = [
    'overdrive', 'in library', 'accessible book', 'protected daisy',
    'lending library', 'internet archive wishlist'
]
# only remove these from works:
wfakes = ['large type books', 'popular print disabled books']

otherbad = ['fictiion']

fakes += otherbad
changes_made = 0
with open(inlist, 'r') as f:
    for item in f:
        olid = item.strip().replace('/books/', '').replace('/works/', '')
        book = ol.get(olid)
        if not book.type.get('key') in ('/type/edition', '/type/work'):
            print("Unexpected type for %s -- Skipping!" % olid)
        else:
            orig_subjects = []
            if hasattr(book, 'subjects'):
                orig_subjects = copy(book.subjects)
            else:
                continue
            #print(olid)
            #print(u"%s: %s -- %s" % (olid, book.title, orig_subjects))
            targets = copy(fakes)
            if book.type['key'] == '/type/work':
                targets += wfakes
            removals = []
            for s in book.subjects:
class TestOpenLibrary(unittest.TestCase):

    @patch('olclient.openlibrary.OpenLibrary.login')
    def setUp(self, mock_login):
        self.ol = OpenLibrary()

    @patch('requests.Session.get')
    def test_get_olid_by_isbn(self, mock_get):
        isbn_key = 'ISBN:0374202915'
        isbn_bibkeys = { isbn_key: { 'info_url': 'https://openlibrary.org/books/OL23575801M/Marie_LaVeau' } }
        mock_get.return_value.json.return_value = isbn_bibkeys
        olid = self.ol.Edition.get_olid_by_isbn(u'0374202915')
        mock_get.assert_called_with("%s/api/books.json?bibkeys=%s" % (self.ol.base_url, isbn_key))
        expected_olid = u'OL23575801M'
        self.assertTrue(olid == expected_olid,
                        "Expected olid %s, got %s" % (expected_olid, olid))

    @patch('requests.Session.get')
    def test_get_olid_notfound_by_bibkey(self, mock_get):
        mock_get.json_data = {}
        edition = self.ol.Edition.get(isbn='foobar')
        assert edition is None

    @patch('requests.Session.get')
    def test_get_work_by_metadata(self, mock_get):
        doc = {
            "key":    u"/works/OL2514747W",
            "title":  u"The Autobiography of Benjamin Franklin",
        }
        search_results = { 'start': 0, 'num_found': 1, 'docs': [doc] }
        title = u"The Autobiography of Benjamin Franklin"
        mock_get.return_value.json.return_value = search_results
        book = self.ol.Work.search(title=title)
        mock_get.assert_called_with("%s/search.json?title=%s" % (self.ol.base_url, title))
        canonical_title = book.canonical_title
        self.assertTrue('franklin' in canonical_title,
                        "Expected 'franklin' to appear in result title: %s" % \
                        canonical_title)

    @patch('requests.Session.get')
    def test_get_edition_by_isbn(self, mock_get):
        isbn_lookup_response = { u'ISBN:0374202915': { 'info_url': u'https://openlibrary.org/books/OL23575801M/Marie_LaVeau' } }
        edition_response = { 'key': u"/books/OL23575801M", 'title': 'test' }
        mock_get.return_value.json.side_effect = [isbn_lookup_response, edition_response]
        book = self.ol.Edition.get(isbn=u'0374202915')
        mock_get.assert_has_calls([
            call("%s/api/books.json?bibkeys=ISBN:0374202915" % self.ol.base_url),
            call().raise_for_status(),
            call().json(),
            call("%s%s.json" % (self.ol.base_url, "/books/OL23575801M")),
            call().raise_for_status(),
            call().json()
        ])
        expected_olid = u'OL23575801M'
        self.assertTrue(book.olid == expected_olid,
                        "Expected olid %s, got %s" % (expected_olid, book.olid))

    @patch('requests.Session.get')
    def test_matching_authors_olid(self, mock_get):
        author_autocomplete = [ {'name': u"Benjamin Franklin", 'key': u"/authors/OL26170A"} ]
        mock_get.return_value.json.return_value = author_autocomplete
        name = u'Benjamin Franklin'
        got_olid = self.ol.Author.get_olid_by_name(name)
        expected_olid = u'OL26170A'
        self.assertTrue(got_olid == expected_olid,
                        "Expected olid %s, got %s" % (expected_olid, got_olid))

    @patch('requests.Session.get')
    def test_create_book(self, mock_get):
        book = Book(publisher=u'Karamanolis', title=u'Alles ber Mikrofone',
                    identifiers={'isbn_10': [u'3922238246']}, publish_date=1982,
                    authors=[Author(name=u'Karl Schwarzer')],
                    publish_location=u'Neubiberg bei Mnchen')
        author_autocomplete = [ {'name': u"Karl Schwarzer", 'key': u"/authors/OL7292805A"} ]
        mock_get.return_value.json.return_value = author_autocomplete
        got_result = self.ol.create_book(book, debug=True)
        mock_get.assert_called_with("%s/authors/_autocomplete?q=%s&limit=1" % (self.ol.base_url, "Karl Schwarzer"))
        expected_result = {
            '_save': '',
            'author_key': u'/authors/OL7292805A',
            'author_name': u'Karl Schwarzer',
            'id_name': 'isbn_10',
            'id_value': u'3922238246',
            'publish_date': 1982,
            'publisher': u'Karamanolis',
            'title': u'Alles ber Mikrofone'
        }
        self.assertTrue(got_result == expected_result,
                        "Expected create_book to return %s, got %s" \
                        % (expected_result, got_result))

    def test_get_work(self):
        work_json = {u'title': u'All Quiet on the Western Front'}
        work = self.ol.Work(u'OL12938932W', **work_json)
        self.assertTrue(work.title.lower() == 'all quiet on the western front',
                        "Failed to retrieve work")

    def test_work_json(self):
        authors=[{ "type": "/type/author_role",
                   "author": { "key": "/authors/OL5864762A" }
                }]
        work = self.ol.Work('OL12938932W',
                            key='/works/OL12938932W',
                            authors=authors)
        work_json = work.json()
        self.assertEqual(work_json['key'], "/works/OL12938932W")
        self.assertEqual(work_json['authors'][0]['author']['key'], "/authors/OL5864762A")

    def test_work_validation(self):
        work = self.ol.Work('OL123W',
                            title='Test Title',
                            type={'key': '/type/work'},
                            revision=1,
                            last_modified={
                              'type': '/type/datetime',
                              'value': '2016-10-12T00:48:04.453554'
                            })
        self.assertIsNone(work.validate())

    def test_edition_json(self):
        author = self.ol.Author('OL123A', 'Test Author')
        edition = self.ol.Edition(edition_olid='OL123M',
                                  work_olid='OL123W',
                                  title='Test Title',
                                  authors=[author])
        edition_json = edition.json()
        self.assertEqual(edition_json['key'], "/books/OL123M")
        self.assertEqual(edition_json['works'][0], {'key': '/works/OL123W'})
        self.assertEqual(edition_json['authors'][0], {'key': '/authors/OL123A'})

        self.assertNotIn('work_olid', edition_json)
        self.assertNotIn('cover', edition_json,
                         "'cover' is not a valid Edition property, should be list: 'covers'")

    def test_edition_validation(self):
        author = self.ol.Author('OL123A', 'Test Author')
        edition = self.ol.Edition(edition_olid='OL123M',
                                  work_olid='OL123W',
                                  title='Test Title',
                                  type={'key': '/type/edition'},
                                  revision=1,
                                  last_modified={
                                      'type': '/type/datetime',
                                      'value': '2016-10-12T00:48:04.453554'
                                  },
                                  authors=[author])
        self.assertIsNone(edition.validate())
        orphaned_edition = self.ol.Edition(edition_olid='OL123M',
                                  work_olid=None,
                                  title='Test Title',
                                  authors=[author])
        with self.assertRaises(jsonschema.exceptions.ValidationError):
            orphaned_edition.validate()

    @patch('requests.Session.get')
    def test_get_notfound(self, mock_get):
        # This tests that if requests.raise_for_status() raises an exception,
        # (e.g. 404 or 500 HTTP response) it is not swallowed by the client.
        mock_get.return_value.raise_for_status = raise_http_error
        suffixes = {'edition': 'M', 'work': 'W', 'author': 'A'}
        for _type, suffix in suffixes.items():
            target = "OLnotfound%s" % suffix
            with pytest.raises(requests.HTTPError, message="HTTPError not raised for %s: %s" % (_type, target)):
                r = self.ol.get(target)

    @patch('requests.Session.post')
    def test_save_many(self, mock_post):
        edition = self.ol.Edition(edition_olid='OL123M', work_olid='OL12W', title='minimal edition')
        work    = self.ol.Work(olid='OL12W', title='minimal work')
        self.ol.save_many([edition, work], "test comment")
        mock_post.assert_called_with("%s/api/save_many" % self.ol.base_url, ANY, headers=ANY)
        called_with_json    = json.loads(mock_post.call_args[0][1])
        called_with_headers = mock_post.call_args[1]['headers']
        assert(len(called_with_json) == 2)
        self.assertIn('ns=42', called_with_headers['Opt'])
        self.assertEqual('test comment', called_with_headers['42-comment'])

    def test_delete(self):
        delete = self.ol.Delete('OL1W')
        self.assertEqual(delete.olid, 'OL1W')
        self.assertEqual('/type/delete', delete.json()['type']['key'])
        self.assertEqual('/works/OL1W', delete.json()['key'])

    def test_redirect(self):
        redirect = self.ol.Redirect(f='OL1W', t='OL2W')
        self.assertEqual('/type/redirect', redirect.json()['type']['key'])
        self.assertIn('location', redirect.json())