Пример #1
0
    def test_it_calls_first(self, annotation, session, Document):
        """If it finds only one document it calls first()."""
        Document.find_or_create_by_uris.return_value = mock.Mock(
            count=mock.Mock(return_value=1))

        document.update_document_metadata(session, annotation, [], [])

        Document.find_or_create_by_uris.return_value.first.assert_called_once_with(
        )
Пример #2
0
    def test_it_calls_first(self, annotation, session, Document):
        """If it finds only one document it calls first()."""
        Document.find_or_create_by_uris.return_value = mock.Mock(
            count=mock.Mock(return_value=1)
        )

        document.update_document_metadata(session, annotation, [], [])

        Document.find_or_create_by_uris.return_value.first.assert_called_once_with()
Пример #3
0
def make_anno(data, dbdocs):
    #document_uri_dicts = data['document']['document_uri_dicts']
    #document_meta_dicts = data['document']['document_meta_dicts']
    #del data['document']
    #data = {k:v for k, v in data.items() if k != 'document'}  # prevent overwrite on batch load

    annotation = models.Annotation(
        **data)  # FIXME for batch the overhead here is stupid beyond belief
    annotation.document_id = dbdocs[uri_normalize(annotation.target_uri)].id
    #for k, v in data.items():
    #print(k, v)
    #setattr(annotation, k, v)
    #id, created, updated = extra
    #annotation.id = id
    #annotation.created = created
    #annotation.updated = updated

    return annotation

    # this baby is super slow
    document = update_document_metadata(session,
                                        annotation.target_uri,
                                        document_meta_dicts,
                                        document_uri_dicts,
                                        created=created,
                                        updated=updated)
    annotation.document = document

    return annotation
Пример #4
0
    def test_it_updates_document_web_uri(self, annotation, Document, factories,
                                         session):
        document_ = mock.Mock(web_uri=None)
        Document.find_or_create_by_uris.return_value.count.return_value = 1
        Document.find_or_create_by_uris.return_value.first.return_value = document_

        document.update_document_metadata(
            session,
            annotation.target_uri,
            [],
            [],
            annotation.created,
            annotation.updated,
        )

        document_.update_web_uri.assert_called_once_with()
Пример #5
0
    def create_anno(self, row):
        datum = validate(row)

        document_dict = datum.pop('document')
        document_uri_dicts = document_dict['document_uri_dicts']
        document_meta_dicts = document_dict['document_meta_dicts']

        id = row['id']
        target_uri = datum['target_uri']
        created = row['created']
        updated = row['updated']

        annotation = models.Annotation(**datum)

        document = update_document_metadata(  # TODO update normalization rules
            self.session,
            target_uri,
            document_meta_dicts,
            document_uri_dicts,
            created=created,  # FIXME doesn't quite seem right, would klobber
            updated=updated)

        print(id)
        annotation.document = document
        annotation.id = id
        annotation.target_uri = target_uri
        annotation.created = created
        annotation.updated = updated
        self.session.add(annotation)
        self.session.flush()
        self.session.commit()  # FIXME hypothesis doesn't call this
Пример #6
0
    def test_it_updates_document_updated(self, annotation, Document,
                                         merge_documents, session):
        yesterday_ = "yesterday"
        document_ = merge_documents.return_value = mock.Mock(
            updated=yesterday_)
        Document.find_or_create_by_uris.return_value.first.return_value = document_

        document.update_document_metadata(
            session,
            annotation.target_uri,
            [],
            [],
            annotation.created,
            annotation.updated,
        )

        assert document_.updated == annotation.updated
Пример #7
0
    def test_it_updates_document_updated(
        self, annotation, Document, merge_documents, session
    ):
        yesterday_ = "yesterday"
        document_ = merge_documents.return_value = mock.Mock(updated=yesterday_)
        Document.find_or_create_by_uris.return_value.first.return_value = document_

        document.update_document_metadata(
            session,
            annotation.target_uri,
            [],
            [],
            annotation.created,
            annotation.updated,
        )

        assert document_.updated == annotation.updated
Пример #8
0
    def test_it_updates_document_web_uri(
        self, annotation, Document, factories, session
    ):
        document_ = mock.Mock(web_uri=None)
        Document.find_or_create_by_uris.return_value.count.return_value = 1
        Document.find_or_create_by_uris.return_value.first.return_value = document_

        document.update_document_metadata(
            session,
            annotation.target_uri,
            [],
            [],
            annotation.created,
            annotation.updated,
        )

        document_.update_web_uri.assert_called_once_with()
Пример #9
0
    def test_it_saves_all_the_document_uris(
        self, session, annotation, Document, create_or_update_document_uri
    ):
        """It creates or updates a DocumentURI for each document URI dict."""
        Document.find_or_create_by_uris.return_value.count.return_value = 1

        document_uri_dicts = [
            {
                "uri": "http://example.com/example_1",
                "claimant": "http://example.com/claimant",
                "type": "type",
                "content_type": None,
            },
            {
                "uri": "http://example.com/example_2",
                "claimant": "http://example.com/claimant",
                "type": "type",
                "content_type": None,
            },
            {
                "uri": "http://example.com/example_3",
                "claimant": "http://example.com/claimant",
                "type": "type",
                "content_type": None,
            },
        ]

        document.update_document_metadata(
            session,
            annotation.target_uri,
            [],
            document_uri_dicts,
            annotation.created,
            annotation.updated,
        )

        assert create_or_update_document_uri.call_count == 3
        for doc_uri_dict in document_uri_dicts:
            create_or_update_document_uri.assert_any_call(
                session=session,
                document=Document.find_or_create_by_uris.return_value.first.return_value,
                created=annotation.created,
                updated=annotation.updated,
                **doc_uri_dict
            )
Пример #10
0
    def test_it_saves_all_the_document_uris(self, session, annotation,
                                            Document,
                                            create_or_update_document_uri):
        """It creates or updates a DocumentURI for each document URI dict."""
        Document.find_or_create_by_uris.return_value.count.return_value = 1

        document_uri_dicts = [
            {
                "uri": "http://example.com/example_1",
                "claimant": "http://example.com/claimant",
                "type": "type",
                "content_type": None,
            },
            {
                "uri": "http://example.com/example_2",
                "claimant": "http://example.com/claimant",
                "type": "type",
                "content_type": None,
            },
            {
                "uri": "http://example.com/example_3",
                "claimant": "http://example.com/claimant",
                "type": "type",
                "content_type": None,
            },
        ]

        document.update_document_metadata(
            session,
            annotation.target_uri,
            [],
            document_uri_dicts,
            annotation.created,
            annotation.updated,
        )

        assert create_or_update_document_uri.call_count == 3
        for doc_uri_dict in document_uri_dicts:
            create_or_update_document_uri.assert_any_call(
                session=session,
                document=Document.find_or_create_by_uris.return_value.first.
                return_value,
                created=annotation.created,
                updated=annotation.updated,
                **doc_uri_dict)
Пример #11
0
    def test_it_uses_the_target_uri_to_get_the_document(
        self, annotation, Document, session
    ):
        document_uri_dicts = [
            {
                "uri": "http://example.com/example_1",
                "claimant": "http://example.com/claimant",
                "type": "type",
                "content_type": None,
            },
            {
                "uri": "http://example.com/example_2",
                "claimant": "http://example.com/claimant",
                "type": "type",
                "content_type": None,
            },
            {
                "uri": "http://example.com/example_3",
                "claimant": "http://example.com/claimant",
                "type": "type",
                "content_type": None,
            },
        ]

        document.update_document_metadata(
            session,
            annotation.target_uri,
            [],
            document_uri_dicts,
            annotation.created,
            annotation.updated,
        )

        Document.find_or_create_by_uris.assert_called_once_with(
            session,
            annotation.target_uri,
            [
                "http://example.com/example_1",
                "http://example.com/example_2",
                "http://example.com/example_3",
            ],
            created=annotation.created,
            updated=annotation.updated,
        )
Пример #12
0
    def test_it_uses_the_target_uri_to_get_the_document(
            self, annotation, Document, session):
        document_uri_dicts = [
            {
                "uri": "http://example.com/example_1",
                "claimant": "http://example.com/claimant",
                "type": "type",
                "content_type": None,
            },
            {
                "uri": "http://example.com/example_2",
                "claimant": "http://example.com/claimant",
                "type": "type",
                "content_type": None,
            },
            {
                "uri": "http://example.com/example_3",
                "claimant": "http://example.com/claimant",
                "type": "type",
                "content_type": None,
            },
        ]

        document.update_document_metadata(
            session,
            annotation.target_uri,
            [],
            document_uri_dicts,
            annotation.created,
            annotation.updated,
        )

        Document.find_or_create_by_uris.assert_called_once_with(
            session,
            annotation.target_uri,
            [
                "http://example.com/example_1",
                "http://example.com/example_2",
                "http://example.com/example_3",
            ],
            created=annotation.created,
            updated=annotation.updated,
        )
Пример #13
0
    def test_it_saves_all_the_document_metas(
        self, annotation, create_or_update_document_meta, Document, session
    ):
        """It creates or updates a DocumentMeta for each document meta dict."""
        Document.find_or_create_by_uris.return_value.count.return_value = 1

        document_meta_dicts = [
            {
                "claimant": "http://example.com/claimant",
                "type": "title",
                "value": "foo",
            },
            {
                "type": "article title",
                "value": "bar",
                "claimant": "http://example.com/claimant",
            },
            {
                "type": "site title",
                "value": "gar",
                "claimant": "http://example.com/claimant",
            },
        ]

        document.update_document_metadata(
            session,
            annotation.target_uri,
            document_meta_dicts,
            [],
            annotation.created,
            annotation.updated,
        )

        assert create_or_update_document_meta.call_count == 3
        for document_meta_dict in document_meta_dicts:
            create_or_update_document_meta.assert_any_call(
                session=session,
                document=Document.find_or_create_by_uris.return_value.first.return_value,
                created=annotation.created,
                updated=annotation.updated,
                **document_meta_dict
            )
Пример #14
0
    def test_it_saves_all_the_document_metas(self, annotation,
                                             create_or_update_document_meta,
                                             Document, session):
        """It creates or updates a DocumentMeta for each document meta dict."""
        Document.find_or_create_by_uris.return_value.count.return_value = 1

        document_meta_dicts = [
            {
                "claimant": "http://example.com/claimant",
                "type": "title",
                "value": "foo",
            },
            {
                "type": "article title",
                "value": "bar",
                "claimant": "http://example.com/claimant",
            },
            {
                "type": "site title",
                "value": "gar",
                "claimant": "http://example.com/claimant",
            },
        ]

        document.update_document_metadata(
            session,
            annotation.target_uri,
            document_meta_dicts,
            [],
            annotation.created,
            annotation.updated,
        )

        assert create_or_update_document_meta.call_count == 3
        for document_meta_dict in document_meta_dicts:
            create_or_update_document_meta.assert_any_call(
                session=session,
                document=Document.find_or_create_by_uris.return_value.first.
                return_value,
                created=annotation.created,
                updated=annotation.updated,
                **document_meta_dict)
Пример #15
0
    def test_if_there_are_multiple_documents_it_merges_them_into_one(
            self, annotation, Document, merge_documents, session):
        """If it finds more than one document it calls merge_documents()."""
        Document.find_or_create_by_uris.return_value = mock.Mock(
            count=mock.Mock(return_value=3))

        document.update_document_metadata(
            session,
            annotation.target_uri,
            [],
            [],
            annotation.created,
            annotation.updated,
        )

        merge_documents.assert_called_once_with(
            session,
            Document.find_or_create_by_uris.return_value,
            updated=annotation.updated,
        )
Пример #16
0
    def test_if_there_are_multiple_documents_it_merges_them_into_one(
        self, annotation, Document, merge_documents, session
    ):
        """If it finds more than one document it calls merge_documents()."""
        Document.find_or_create_by_uris.return_value = mock.Mock(
            count=mock.Mock(return_value=3)
        )

        document.update_document_metadata(
            session,
            annotation.target_uri,
            [],
            [],
            annotation.created,
            annotation.updated,
        )

        merge_documents.assert_called_once_with(
            session,
            Document.find_or_create_by_uris.return_value,
            updated=annotation.updated,
        )
Пример #17
0
    def test_it_returns_a_document(
        self, annotation, create_or_update_document_meta, Document, session
    ):
        Document.find_or_create_by_uris.return_value.count.return_value = 1

        result = document.update_document_metadata(
            session,
            annotation.target_uri,
            [],
            [],
            annotation.created,
            annotation.updated,
        )

        assert result == Document.find_or_create_by_uris.return_value.first.return_value
Пример #18
0
    def test_it_returns_a_document(self, annotation,
                                   create_or_update_document_meta, Document,
                                   session):
        Document.find_or_create_by_uris.return_value.count.return_value = 1

        result = document.update_document_metadata(
            session,
            annotation.target_uri,
            [],
            [],
            annotation.created,
            annotation.updated,
        )

        assert result == Document.find_or_create_by_uris.return_value.first.return_value
Пример #19
0
    def make_metadata(self, create, extracted, **kwargs):
        """Create associated document metadata for the annotation."""
        # The metadata objects are going to be added to the db, so if we're not
        # using the create strategy then simply don't make any.
        if not create:
            return

        def document_uri_dict():
            """
            Return a randomly generated DocumentURI dict for this annotation.

            This doesn't add anything to the database session yet.
            """
            document_uri = DocumentURI.build(
                document=None, claimant=self.target_uri, uri=self.target_uri
            )
            return dict(
                claimant=document_uri.claimant,
                uri=document_uri.uri,
                type=document_uri.type,
                content_type=document_uri.content_type,
            )

        document_uri_dicts = [document_uri_dict() for _ in range(random.randint(1, 3))]

        def document_meta_dict(type_=None):
            """
            Return a randomly generated DocumentMeta dict for this annotation.

            This doesn't add anything to the database session yet.
            """
            kwargs = {"document": None, "claimant": self.target_uri}

            if type_ is not None:
                kwargs["type"] = type_

            document_meta = DocumentMeta.build(**kwargs)

            return dict(
                claimant=document_meta.claimant,
                type=document_meta.type,
                value=document_meta.value,
            )

        document_meta_dicts = [
            document_meta_dict() for _ in range(random.randint(1, 3))
        ]

        # Make sure that there's always at least one DocumentMeta with
        # type='title', so that we never get annotation.document.title is None:
        if "title" not in [m["type"] for m in document_meta_dicts]:
            document_meta_dicts.append(document_meta_dict(type_="title"))

        self.document = update_document_metadata(
            orm.object_session(self),
            self.target_uri,
            document_meta_dicts=document_meta_dicts,
            document_uri_dicts=document_uri_dicts,
            created=self.created,
            updated=self.updated,
        )