Exemplo n.º 1
0
    def test_it_logs_a_warning_if_document_ids_differ(self, log):
        """
        It should log a warning on Document objects mismatch.

        If there's an existing DocumentURI and its .document property is
        different to the given document it shoulg log a warning.

        """
        session = mock_db_session()

        # existing_document_uri.document won't be equal to the given document.
        existing_document_uri = mock.Mock(document=mock_document())
        session.query.return_value.filter.return_value.first.return_value = (
            existing_document_uri
        )

        document.create_or_update_document_uri(
            session=session,
            claimant="http://example.com/example_claimant.html",
            uri="http://example.com/example_uri.html",
            type="self-claim",
            content_type=None,
            document=mock_document(),
            created=now(),
            updated=now(),
        )

        assert log.warning.call_count == 1
Exemplo n.º 2
0
    def test_it_updates_the_existing_DocumentURI_if_there_is_one(self, db_session):
        claimant = 'http://example.com/example_claimant.html'
        uri = 'http://example.com/example_uri.html'
        type_ = 'self-claim'
        content_type = ''
        document_ = document.Document()
        created = yesterday()
        updated = yesterday()
        document_uri = document.DocumentURI(
            claimant=claimant,
            uri=uri,
            type=type_,
            content_type=content_type,
            document=document_,
            created=created,
            updated=updated,
        )
        db_session.add(document_uri)

        now_ = now()
        document.create_or_update_document_uri(
            session=db_session,
            claimant=claimant,
            uri=uri,
            type=type_,
            content_type=content_type,
            document=document_,
            created=now_,
            updated=now_,
        )

        assert document_uri.created == created
        assert document_uri.updated == now_
        assert len(db_session.query(document.DocumentURI).all()) == 1, (
            "It shouldn't have added any new objects to the db")
Exemplo n.º 3
0
    def test_it_updates_the_existing_DocumentURI_if_there_is_one(self, db_session):
        claimant = 'http://example.com/example_claimant.html'
        uri = 'http://example.com/example_uri.html'
        type_ = 'self-claim'
        content_type = ''
        document_ = document.Document()
        created = yesterday()
        updated = yesterday()
        document_uri = document.DocumentURI(
            claimant=claimant,
            uri=uri,
            type=type_,
            content_type=content_type,
            document=document_,
            created=created,
            updated=updated,
        )
        db_session.add(document_uri)

        now_ = now()
        document.create_or_update_document_uri(
            session=db_session,
            claimant=claimant,
            uri=uri,
            type=type_,
            content_type=content_type,
            document=document_,
            created=now_,
            updated=now_,
        )

        assert document_uri.created == created
        assert document_uri.updated == now_
        assert len(db_session.query(document.DocumentURI).all()) == 1, (
            "It shouldn't have added any new objects to the db")
Exemplo n.º 4
0
    def test_it_logs_a_warning_if_document_ids_differ(self, log):
        """
        It should log a warning on Document objects mismatch.

        If there's an existing DocumentURI and its .document property is
        different to the given document it shoulg log a warning.

        """
        session = mock_db_session()

        # existing_document_uri.document won't be equal to the given document.
        existing_document_uri = mock.Mock(document=mock_document())
        session.query.return_value.filter.return_value.first.return_value = (
            existing_document_uri)

        document.create_or_update_document_uri(
            session=session,
            claimant="http://example.com/example_claimant.html",
            uri="http://example.com/example_uri.html",
            type="self-claim",
            content_type=None,
            document=mock_document(),
            created=now(),
            updated=now(),
        )

        assert log.warning.call_count == 1
Exemplo n.º 5
0
    def test_it_skips_denormalizing_http_uris_to_document(
            self, db_session, doc_uri_attrs):
        doc_uri_attrs["document"] = document = Document(
            web_uri="http://example.com/first_uri.html")
        db_session.add(document)

        create_or_update_document_uri(session=db_session, **doc_uri_attrs)

        document_ = db_session.query(Document).get(document.id)
        assert document_.web_uri == "http://example.com/first_uri.html"
Exemplo n.º 6
0
    def test_raises_retryable_error_when_flush_fails(
        self, db_session, monkeypatch, doc_uri_attrs
    ):
        def err():
            raise sa.exc.IntegrityError(None, None, None)

        monkeypatch.setattr(db_session, "flush", err)

        with pytest.raises(ConcurrentUpdateError):
            with db_session.no_autoflush:  # prevent premature IntegrityError
                create_or_update_document_uri(session=db_session, **doc_uri_attrs)
Exemplo n.º 7
0
    def test_it_logs_a_warning_if_document_ids_differ(
        self, log, mock_db_session, factories, doc_uri_attrs
    ):
        # Ensure the document we use, and that returned by filter first are
        # different
        mock_db_session.query.return_value.filter.return_value.first.return_value = (
            factories.DocumentURI()
        )
        different_document = factories.Document()

        create_or_update_document_uri(
            session=mock_db_session, **dict(doc_uri_attrs, document=different_document)
        )

        assert log.warning.call_count == 1
Exemplo n.º 8
0
    def test_it_creates_a_new_DocumentURI_if_there_is_no_existing_one(
            self, db_session, doc_uri_attrs):
        original_attrs = doc_uri_attrs
        updated_attrs = dict(original_attrs,
                             created=datetime.now(),
                             updated=datetime.now())
        # Add one non-matching DocumentURI to the database.
        db_session.add(
            DocumentURI(**dict(original_attrs, content_type="different")))

        create_or_update_document_uri(session=db_session, **updated_attrs)

        document_uri = (db_session.query(DocumentURI).order_by(
            DocumentURI.created.desc()).first())
        assert document_uri == Any.object.with_attrs(updated_attrs)
Exemplo n.º 9
0
    def test_it_updates_the_existing_DocumentURI_if_there_is_one(
            self, db_session, doc_uri_attrs):
        original_attrs = doc_uri_attrs
        updated_attrs = dict(original_attrs,
                             created=datetime.now(),
                             updated=datetime.now())
        document_uri = DocumentURI(**original_attrs)
        db_session.add(document_uri)

        create_or_update_document_uri(session=db_session, **updated_attrs)

        assert document_uri.created == original_attrs["created"]
        assert document_uri.updated == updated_attrs["updated"]
        assert (len(db_session.query(DocumentURI).all()) == 1
                ), "It shouldn't have added any new objects to the db"
Exemplo n.º 10
0
    def test_it_creates_a_new_DocumentURI_if_there_is_no_existing_one(self, db_session):
        claimant = "http://example.com/example_claimant.html"
        uri = "http://example.com/example_uri.html"
        type_ = "self-claim"
        content_type = ""
        document_ = document.Document()
        created = yesterday()
        updated = yesterday()

        # Add one non-matching DocumentURI to the database.
        db_session.add(
            document.DocumentURI(
                claimant=claimant,
                uri=uri,
                type=type_,
                # Different content_type means this DocumentURI should not match
                # the query.
                content_type="different",
                document=document_,
                created=created,
                updated=updated,
            )
        )

        document.create_or_update_document_uri(
            session=db_session,
            claimant=claimant,
            uri=uri,
            type=type_,
            content_type=content_type,
            document=document_,
            created=now(),
            updated=now(),
        )

        document_uri = (
            db_session.query(document.DocumentURI)
            .order_by(document.DocumentURI.created.desc())
            .first()
        )
        assert document_uri.claimant == claimant
        assert document_uri.uri == uri
        assert document_uri.type == type_
        assert document_uri.content_type == content_type
        assert document_uri.document == document_
        assert document_uri.created > created
        assert document_uri.updated > updated
Exemplo n.º 11
0
    def test_it_skips_denormalizing_http_s_uri_to_document(self, db_session):
        document_ = document.Document(web_uri='http://example.com/first_uri.html')
        db_session.add(document_)

        document.create_or_update_document_uri(
            session=db_session,
            claimant='http://example.com/example_claimant.html',
            uri='http://example.com/second_uri.html',
            type='self-claim',
            content_type='',
            document=document_,
            created=now(),
            updated=now(),
        )

        document_ = db_session.query(document.Document).get(document_.id)
        assert document_.web_uri == 'http://example.com/first_uri.html'
Exemplo n.º 12
0
    def test_it_skips_denormalizing_http_s_uri_to_document(self, db_session):
        document_ = document.Document(web_uri="http://example.com/first_uri.html")
        db_session.add(document_)

        document.create_or_update_document_uri(
            session=db_session,
            claimant="http://example.com/example_claimant.html",
            uri="http://example.com/second_uri.html",
            type="self-claim",
            content_type="",
            document=document_,
            created=now(),
            updated=now(),
        )

        document_ = db_session.query(document.Document).get(document_.id)
        assert document_.web_uri == "http://example.com/first_uri.html"
Exemplo n.º 13
0
    def test_it_creates_a_new_DocumentURI_if_there_is_no_existing_one(
            self, db_session):
        claimant = "http://example.com/example_claimant.html"
        uri = "http://example.com/example_uri.html"
        type_ = "self-claim"
        content_type = ""
        document_ = document.Document()
        created = yesterday()
        updated = yesterday()

        # Add one non-matching DocumentURI to the database.
        db_session.add(
            document.DocumentURI(
                claimant=claimant,
                uri=uri,
                type=type_,
                # Different content_type means this DocumentURI should not match
                # the query.
                content_type="different",
                document=document_,
                created=created,
                updated=updated,
            ))

        document.create_or_update_document_uri(
            session=db_session,
            claimant=claimant,
            uri=uri,
            type=type_,
            content_type=content_type,
            document=document_,
            created=now(),
            updated=now(),
        )

        document_uri = (db_session.query(document.DocumentURI).order_by(
            document.DocumentURI.created.desc()).first())
        assert document_uri.claimant == claimant
        assert document_uri.uri == uri
        assert document_uri.type == type_
        assert document_uri.content_type == content_type
        assert document_uri.document == document_
        assert document_uri.created > created
        assert document_uri.updated > updated
Exemplo n.º 14
0
    def test_raises_retryable_error_when_flush_fails(self, db_session, monkeypatch):
        document_ = document.Document()

        def err():
            raise sa.exc.IntegrityError(None, None, None)
        monkeypatch.setattr(db_session, 'flush', err)

        with pytest.raises(transaction.interfaces.TransientError):
            with db_session.no_autoflush:  # prevent premature IntegrityError
                document.create_or_update_document_uri(
                    session=db_session,
                    claimant='http://example.com',
                    uri='http://example.org',
                    type='rel-canonical',
                    content_type='text/html',
                    document=document_,
                    created=now(),
                    updated=now(),
                )
Exemplo n.º 15
0
    def test_it_creates_a_new_DocumentURI_if_there_is_no_existing_one(
            self, db_session):
        claimant = 'http://example.com/example_claimant.html'
        uri = 'http://example.com/example_uri.html'
        type_ = 'self-claim'
        content_type = ''
        document_ = document.Document()
        created = yesterday()
        updated = yesterday()

        # Add one non-matching DocumentURI to the database.
        db_session.add(
            document.DocumentURI(
                claimant=claimant,
                uri=uri,
                type=type_,
                # Different content_type means this DocumentURI should not match
                # the query.
                content_type='different',
                document=document_,
                created=created,
                updated=updated,
            ))

        document.create_or_update_document_uri(
            session=db_session,
            claimant=claimant,
            uri=uri,
            type=type_,
            content_type=content_type,
            document=document_,
            created=now(),
            updated=now(),
        )

        document_uri = db_session.query(document.DocumentURI).all()[-1]
        assert document_uri.claimant == claimant
        assert document_uri.uri == uri
        assert document_uri.type == type_
        assert document_uri.content_type == content_type
        assert document_uri.document == document_
        assert document_uri.created > created
        assert document_uri.updated > updated
Exemplo n.º 16
0
    def test_raises_retryable_error_when_flush_fails(self, db_session, monkeypatch):
        document_ = document.Document()

        def err():
            raise sa.exc.IntegrityError(None, None, None)
        monkeypatch.setattr(db_session, 'flush', err)

        with pytest.raises(transaction.interfaces.TransientError):
            with db_session.no_autoflush:  # prevent premature IntegrityError
                document.create_or_update_document_uri(
                    session=db_session,
                    claimant='http://example.com',
                    uri='http://example.org',
                    type='rel-canonical',
                    content_type='text/html',
                    document=document_,
                    created=now(),
                    updated=now(),
                )
Exemplo n.º 17
0
    def test_it_creates_a_new_DocumentURI_if_there_is_no_existing_one(self, db_session):
        claimant = 'http://example.com/example_claimant.html'
        uri = 'http://example.com/example_uri.html'
        type_ = 'self-claim'
        content_type = ''
        document_ = document.Document()
        created = yesterday()
        updated = yesterday()

        # Add one non-matching DocumentURI to the database.
        db_session.add(document.DocumentURI(
            claimant=claimant,
            uri=uri,
            type=type_,
            # Different content_type means this DocumentURI should not match
            # the query.
            content_type='different',
            document=document_,
            created=created,
            updated=updated,
        ))

        document.create_or_update_document_uri(
            session=db_session,
            claimant=claimant,
            uri=uri,
            type=type_,
            content_type=content_type,
            document=document_,
            created=now(),
            updated=now(),
        )

        document_uri = db_session.query(document.DocumentURI).all()[-1]
        assert document_uri.claimant == claimant
        assert document_uri.uri == uri
        assert document_uri.type == type_
        assert document_uri.content_type == content_type
        assert document_uri.document == document_
        assert document_uri.created > created
        assert document_uri.updated > updated
Exemplo n.º 18
0
    def test_raises_retryable_error_when_flush_fails(self, db_session,
                                                     monkeypatch):
        document_ = document.Document()

        def err():
            raise sa.exc.IntegrityError(None, None, None)

        monkeypatch.setattr(db_session, "flush", err)

        with pytest.raises(ConcurrentUpdateError):
            with db_session.no_autoflush:  # prevent premature IntegrityError
                document.create_or_update_document_uri(
                    session=db_session,
                    claimant="http://example.com",
                    uri="http://example.org",
                    type="rel-canonical",
                    content_type="text/html",
                    document=document_,
                    created=now(),
                    updated=now(),
                )