def test_it_merges_documents_when_duplicates_found(req): docuri_1 = models.DocumentURI(claimant='http://example.org/', uri='http://example.org/', type='self-claim') docuri_2 = models.DocumentURI(claimant='https://example.net/', uri='https://example.org/', type='rel-canonical') req.db.add_all([ models.Document(document_uris=[docuri_1]), models.Document(document_uris=[docuri_2]), ]) req.db.flush() normalize_uris.normalize_document_uris(req) assert req.db.query(models.Document).count() == 1
def test_it_deletes_duplicate_document_meta_objects(req): docmeta_1 = models.DocumentMeta(_claimant='http://example.org/', type='title', value=['Test Title']) docmeta_1._claimant_normalized = 'http://example.org' docmeta_2 = models.DocumentMeta(_claimant='https://example.org/', type='title', value=['Test Title']) docmeta_2._claimant_normalized = 'https://example.org' req.db.add_all([ models.Document(meta=[docmeta_1]), models.Document(meta=[docmeta_2]), ]) req.db.flush() normalize_uris.normalize_document_meta(req) assert req.db.query(models.DocumentMeta).count() == 1
def _annotation(**kwargs): args = { 'userid': 'acct:[email protected]', 'created': datetime.utcnow(), 'updated': datetime.utcnow(), 'target_selectors': [], 'document': models.Document(), } args.update(kwargs) return models.Annotation(**args)
class TestDocumentSearchIndexPresenter(object): @pytest.mark.parametrize('document,expected', [ (models.Document(title='Foo'), {'title': ['Foo']}), (models.Document(title=''), {}), (models.Document(title=None), {}), (models.Document(web_uri='http://foo.org'), {'web_uri': 'http://foo.org'}), (models.Document(web_uri=''), {}), (models.Document(web_uri=None), {}), (models.Document(title='Foo', web_uri='http://foo.org'), {'title': ['Foo'], 'web_uri': 'http://foo.org'}), (None, {}) ]) def test_asdict(self, document, expected): assert expected == DocumentSearchIndexPresenter(document).asdict()
def test_asdict(self, db_session): document = models.Document( title='Foo', document_uris=[ models.DocumentURI(uri='http://foo.com', claimant='http://foo.com'), models.DocumentURI(uri='http://foo.org', claimant='http://foo.com', type='rel-canonical') ]) db_session.add(document) db_session.flush() presenter = DocumentJSONPresenter(document) expected = {'title': ['Foo']} assert expected == presenter.asdict()
def test_asdict_does_not_render_other_meta_than_title(self, db_session): document = models.Document(meta=[ models.DocumentMeta( type='title', value=['Foo'], claimant='http://foo.com'), models.DocumentMeta(type='twitter.url', value=['http://foo.com'], claimant='http://foo.com'), models.DocumentMeta(type='facebook.title', value=['FB Title'], claimant='http://foo.com'), ]) db_session.add(document) db_session.flush() presenter = DocumentJSONPresenter(document) assert {'title': ['Foo']} == presenter.asdict()
def test_it_normalizes_document_meta_claimant(req): docmeta_1 = models.DocumentMeta(_claimant='http://example.org/', _claimant_normalized='http://example.org', type='title', value=['Test Title']) docmeta_2 = models.DocumentMeta(_claimant='http://example.net/', _claimant_normalized='http://example.net', type='title', value=['Test Title']) req.db.add(models.Document(meta=[docmeta_1, docmeta_2])) req.db.flush() normalize_uris.normalize_document_meta(req) assert docmeta_1.claimant_normalized == 'httpx://example.org' assert docmeta_2.claimant_normalized == 'httpx://example.net'
def test_it_deletes_duplicate_document_uri_objects(req): docuri_1 = models.DocumentURI(_claimant='http://example.org/', _claimant_normalized='http://example.org', _uri='http://example.org/', _uri_normalized='http://example.org', type='self-claim') docuri_2 = models.DocumentURI(_claimant='https://example.org/', _claimant_normalized='https://example.org', _uri='https://example.org/', _uri_normalized='https://example.org', type='self-claim') req.db.add(models.Document(document_uris=[docuri_1, docuri_2])) req.db.flush() normalize_uris.normalize_document_uris(req) assert req.db.query(models.DocumentURI).count() == 1
def test_it_normalizes_document_uris_claimant(req): docuri_1 = models.DocumentURI(_claimant='http://example.org/', _claimant_normalized='http://example.org', _uri='http://example.org/', _uri_normalized='http://example.org', type='self-claim') docuri_2 = models.DocumentURI(_claimant='http://example.org/', _claimant_normalized='http://example.org', _uri='https://example.org/', _uri_normalized='https://example.org', type='rel-canonical') req.db.add(models.Document(document_uris=[docuri_1, docuri_2])) req.db.flush() normalize_uris.normalize_document_uris(req) assert docuri_1.claimant_normalized == 'httpx://example.org' assert docuri_2.claimant_normalized == 'httpx://example.org'