Пример #1
0
    def test_extract_multiple_works_with_author_restriction(self):
        """We can choose to only accept works by a given author."""
        xml = self.sample_data("multi_work_response.xml")

        [wrong_author], ignore = Contributor.lookup(self._db, sort_name="Wrong Author")
        status, swids = OCLCXMLParser.parse(
            self._db, xml, languages=["eng"], authors=[wrong_author])
        # This person is not listed as an author of any work in the dataset,
        # so none of those works were picked up.
        eq_(0, len(swids))

        [melville], ignore = Contributor.lookup(self._db, sort_name="Melville, Herman")
        status, swids = OCLCXMLParser.parse(
            self._db, xml, languages=["eng"], authors=[melville])

        # We picked up 11 of the 25 works in the dataset.
        eq_(11, len(swids))

        # The missing works (as you can verify by looking at
        # oclc_multi_work_response.xml) either don't credit Herman
        # Melville at all (the 1956 Gregory Peck movie "Moby Dick"),
        # credit him as "Associated name" rather than as an author
        # (four books about "Moby Dick"), or credit him as an author
        # but not as the primary author (academic works and adaptations).
        for missing in '10798812', '13424036', '22658644', '250604212', '474972877', '13358012', '153927888', '13206523', '46935692', "14135019", "51088077", "105446800", "164732682", "26863225":
            assert missing not in swids
Пример #2
0
    def test_to_edition_sets_sort_author_name_if_obvious(self):
        [contributor], ignore = Contributor.lookup(self._db, u"Hawkins, Paula")
        contributor.display_name = u"Paula Hawkins"

        title = NYTBestSellerListTitle(self.one_list_title)
        edition = title.to_edition(self._db, self.metadata_client)
        eq_(contributor.sort_name, edition.sort_author)
        eq_(contributor.display_name, edition.author)
        assert edition.permanent_work_id is not None
Пример #3
0
    def test_to_edition_sets_sort_author_name_if_obvious(self):
        [contributor], ignore = Contributor.lookup(
            self._db, u"Hawkins, Paula")
        contributor.display_name = u"Paula Hawkins"

        title = NYTBestSellerListTitle(self.one_list_title, Edition.BOOK_MEDIUM)
        edition = title.to_edition(self._db, self.metadata_client)
        eq_(contributor.sort_name, edition.sort_author)
        eq_(contributor.display_name, edition.author)
        assert edition.permanent_work_id is not None
    def test_extract_multiple_works_with_author_restriction(self):
        """We can choose to only accept works by a given author."""
        xml = self.sample_data("multi_work_response.xml")

        [wrong_author], ignore = Contributor.lookup(self._db, sort_name="Wrong Author")
        status, swids = OCLCXMLParser.parse(self._db, xml, languages=["eng"], authors=[wrong_author])
        # This person is not listed as an author of any work in the dataset,
        # so none of those works were picked up.
        eq_(0, len(swids))

        [melville], ignore = Contributor.lookup(self._db, sort_name="Melville, Herman")
        status, swids = OCLCXMLParser.parse(self._db, xml, languages=["eng"], authors=[melville])

        # We picked up 11 of the 25 works in the dataset.
        eq_(11, len(swids))

        # The missing works (as you can verify by looking at
        # oclc_multi_work_response.xml) either don't credit Herman
        # Melville at all (the 1956 Gregory Peck movie "Moby Dick"),
        # credit him as "Associated name" rather than as an author
        # (four books about "Moby Dick"), or credit him as an author
        # but not as the primary author (academic works and adaptations).
        for missing in (
            "10798812",
            "13424036",
            "22658644",
            "250604212",
            "474972877",
            "13358012",
            "153927888",
            "13206523",
            "46935692",
            "14135019",
            "51088077",
            "105446800",
            "164732682",
            "26863225",
        ):
            assert missing not in swids
Пример #5
0
    def _parse_single_author(cls, _db, author,
                             lc=None, viaf=None,
                             existing_authors=[],
                             default_role=Contributor.AUTHOR_ROLE,
                             primary_author=None):
        default_role_used = False
        # First find roles if present
        # "Giles, Lionel, 1875-1958 [Writer of added commentary; Translator]"
        author = author.strip()
        m = cls.ROLES.search(author)
        if m:
            author = author[:m.start()].strip()
            role_string = m.groups()[0]
            roles = [x.strip() for x in role_string.split(";")]
        elif default_role:
            roles = [default_role]
            default_role_used = True
        else:
            roles = []

        # Author string now looks like
        # "Giles, Lionel, 1875-1958"
        m = cls.LIFESPAN.search(author)
        kwargs = dict()
        if m:
            author = author[:m.start()].strip()
            birth, death = m.groups()
            if birth:
                kwargs[Contributor.BIRTH_DATE] = birth
            if death:
                kwargs[Contributor.DEATH_DATE] = death

        # Author string now looks like
        # "Giles, Lionel,"
        if author.endswith(","):
            author = author[:-1]

        contributor = None
        if not author:
            # No name was given for the author.
            return None, roles, default_role_used

        if primary_author and author == primary_author.sort_name:
            if Contributor.AUTHOR_ROLE in roles:
                roles.remove(Contributor.AUTHOR_ROLE)
            if Contributor.UNKNOWN_ROLE in roles:
                roles.remove(Contributor.UNKNOWN_ROLE)
            roles.insert(0, Contributor.PRIMARY_AUTHOR_ROLE)

        if existing_authors:
            # Calling Contributor.lookup will result in a database
            # hit, and looking up a contributor based on name may
            # result in multiple results (see below). We'll have no
            # way of distinguishing between those results. If
            # possible, it's much more reliable to look through
            # existing_authors (the authors derived from an entry's
            # <authors> tag).
            for x in existing_authors:
                if cls._contributor_match(x, author, lc, viaf):
                    contributor = x
                    break
            if contributor:
                was_new = False

        if not contributor:
            contributor, was_new = Contributor.lookup(
                _db, author, viaf, lc, extra=kwargs)
        if isinstance(contributor, list):
            # We asked for an author based solely on the name, which makes
            # Contributor.lookup() return a list.
            if len(contributor) == 1:
                # Fortunately, either the database knows about only
                # one author with that name, or it didn't know about
                # any authors with that name and it just created one,
                # so we can unambiguously use it.
                contributor = contributor[0]
            else:
                # Uh-oh. The database knows about multiple authors
                # with that name.  We have no basis for deciding which
                # author we mean. But we would prefer to identify with
                # an author who has a known LC or VIAF number.
                #
                # This should happen very rarely because of our check
                # against existing_authors above. But it will happen
                # for authors that have a work in Project Gutenberg.
                with_id = [x for x in contributor if x.lc is not None
                           or x.viaf is not None]
                if with_id:
                    contributor = with_id[0]
                else:
                    contributor = contributor[0]
        return contributor, roles, default_role_used
Пример #6
0
    def _parse_single_author(cls,
                             _db,
                             author,
                             lc=None,
                             viaf=None,
                             existing_authors=[],
                             default_role=Contributor.AUTHOR_ROLE,
                             primary_author=None):
        default_role_used = False
        # First find roles if present
        # "Giles, Lionel, 1875-1958 [Writer of added commentary; Translator]"
        author = author.strip()
        m = cls.ROLES.search(author)
        if m:
            author = author[:m.start()].strip()
            role_string = m.groups()[0]
            roles = [x.strip() for x in role_string.split(";")]
        elif default_role:
            roles = [default_role]
            default_role_used = True
        else:
            roles = []

        # Author string now looks like
        # "Giles, Lionel, 1875-1958"
        m = cls.LIFESPAN.search(author)
        kwargs = dict()
        if m:
            author = author[:m.start()].strip()
            birth, death = m.groups()
            if birth:
                kwargs[Contributor.BIRTH_DATE] = birth
            if death:
                kwargs[Contributor.DEATH_DATE] = death

        # Author string now looks like
        # "Giles, Lionel,"
        if author.endswith(","):
            author = author[:-1]

        contributor = None
        if not author:
            # No name was given for the author.
            return None, roles, default_role_used

        if primary_author and author == primary_author.sort_name:
            if Contributor.AUTHOR_ROLE in roles:
                roles.remove(Contributor.AUTHOR_ROLE)
            if Contributor.UNKNOWN_ROLE in roles:
                roles.remove(Contributor.UNKNOWN_ROLE)
            roles.insert(0, Contributor.PRIMARY_AUTHOR_ROLE)

        if existing_authors:
            # Calling Contributor.lookup will result in a database
            # hit, and looking up a contributor based on name may
            # result in multiple results (see below). We'll have no
            # way of distinguishing between those results. If
            # possible, it's much more reliable to look through
            # existing_authors (the authors derived from an entry's
            # <authors> tag).
            for x in existing_authors:
                if cls._contributor_match(x, author, lc, viaf):
                    contributor = x
                    break
            if contributor:
                was_new = False

        if not contributor:
            contributor, was_new = Contributor.lookup(_db,
                                                      author,
                                                      viaf,
                                                      lc,
                                                      extra=kwargs)
        if isinstance(contributor, list):
            # We asked for an author based solely on the name, which makes
            # Contributor.lookup() return a list.
            if len(contributor) == 1:
                # Fortunately, either the database knows about only
                # one author with that name, or it didn't know about
                # any authors with that name and it just created one,
                # so we can unambiguously use it.
                contributor = contributor[0]
            else:
                # Uh-oh. The database knows about multiple authors
                # with that name.  We have no basis for deciding which
                # author we mean. But we would prefer to identify with
                # an author who has a known LC or VIAF number.
                #
                # This should happen very rarely because of our check
                # against existing_authors above. But it will happen
                # for authors that have a work in Project Gutenberg.
                with_id = [
                    x for x in contributor
                    if x.lc is not None or x.viaf is not None
                ]
                if with_id:
                    contributor = with_id[0]
                else:
                    contributor = contributor[0]
        return contributor, roles, default_role_used