def merge_data(self, db_session, request): master = document.Document( document_uris=[ document.DocumentURI( claimant='https://en.wikipedia.org/wiki/Main_Page', uri='https://en.wikipedia.org/wiki/Main_Page', type='self-claim') ], meta=[ document.DocumentMeta( claimant='https://en.wikipedia.org/wiki/Main_Page', type='title', value='Wikipedia, the free encyclopedia') ]) duplicate = document.Document( document_uris=[ document.DocumentURI( claimant='https://m.en.wikipedia.org/wiki/Main_Page', uri='https://en.wikipedia.org/wiki/Main_Page', type='rel-canonical') ], meta=[ document.DocumentMeta( claimant='https://m.en.wikipedia.org/wiki/Main_Page', type='title', value='Wikipedia, the free encyclopedia') ]) db_session.add_all([master, duplicate]) db_session.flush() return (master, duplicate)
def merge_data(self, db_session, request): master = document.Document( document_uris=[ document.DocumentURI( claimant='https://en.wikipedia.org/wiki/Main_Page', uri='https://en.wikipedia.org/wiki/Main_Page', type='self-claim') ], meta=[ document.DocumentMeta( claimant='https://en.wikipedia.org/wiki/Main_Page', type='title', value='Wikipedia, the free encyclopedia') ]) duplicate_1 = document.Document( document_uris=[ document.DocumentURI( claimant='https://m.en.wikipedia.org/wiki/Main_Page', uri='https://en.wikipedia.org/wiki/Main_Page', type='rel-canonical') ], meta=[ document.DocumentMeta( claimant='https://m.en.wikipedia.org/wiki/Main_Page', type='title', value='Wikipedia, the free encyclopedia') ]) duplicate_2 = document.Document( document_uris=[ document.DocumentURI( claimant='https://en.wikipedia.org/wiki/Home', uri='https://en.wikipedia.org/wiki/Main_Page', type='rel-canonical') ], meta=[ document.DocumentMeta( claimant='https://en.wikipedia.org/wiki/Home', type='title', value='Wikipedia, the free encyclopedia') ]) db_session.add_all([master, duplicate_1, duplicate_2]) db_session.flush() master_ann_1 = models.Annotation(userid='luke', document_id=master.id) master_ann_2 = models.Annotation(userid='alice', document_id=master.id) duplicate_1_ann_1 = models.Annotation(userid='lucy', document_id=duplicate_1.id) duplicate_1_ann_2 = models.Annotation(userid='bob', document_id=duplicate_1.id) duplicate_2_ann_1 = models.Annotation(userid='amy', document_id=duplicate_2.id) duplicate_2_ann_2 = models.Annotation(userid='dan', document_id=duplicate_2.id) db_session.add_all([ master_ann_1, master_ann_2, duplicate_1_ann_1, duplicate_1_ann_2, duplicate_2_ann_1, duplicate_2_ann_2 ]) return (master, duplicate_1, duplicate_2)
def test_it_returns_the_value_of_the_first_title_DocumentMeta( self, db_session): doc = document.Document() document.DocumentMeta(type='title', value=['The US Title'], document=doc, claimant='http://example.com') document.DocumentMeta(type='title', value=['The UK Title'], document=doc, claimant='http://example.co.uk') db_session.add(doc) db_session.flush() assert doc.title == 'The US Title'
def test_it_updates_an_existing_DocumentMeta_if_there_is_one(self, db_session): claimant = 'http://example.com/claimant' type_ = 'title' value = 'the title' document_ = document.Document() created = yesterday() updated = now() document_meta = document.DocumentMeta( claimant=claimant, type=type_, value=value, document=document_, created=created, updated=updated, ) db_session.add(document_meta) new_updated = now() document.create_or_update_document_meta( session=db_session, claimant=claimant, type=type_, value='new value', document=document.Document(), # This should be ignored. created=now(), # This should be ignored. updated=new_updated, ) assert document_meta.value == 'new value' assert document_meta.updated == new_updated assert document_meta.created == created, "It shouldn't update created" assert document_meta.document == document_, ( "It shouldn't update document") assert len(db_session.query(document.DocumentMeta).all()) == 1, ( "It shouldn't have added any new objects to the db")
def test_it_returns_None_if_there_are_no_title_DocumentMetas( self, db_session): doc = document.Document() document.DocumentMeta(type='other', value='something', document=doc, claimant='http://example.com') db_session.add(doc) db_session.flush() assert doc.title is None
def test_it_returns_the_value_of_the_one_title_DocumentMeta( self, db_session): """When there's only one DocumentMeta it should return its title.""" doc = document.Document() document.DocumentMeta(type='title', value=['The Title'], document=doc, claimant='http://example.com') db_session.add(doc) db_session.flush() assert doc.title == 'The Title'
def test_it_creates_a_new_DocumentMeta_if_there_is_no_existing_one( self, db_session): claimant = 'http://example.com/claimant' type_ = 'title' value = 'the title' document_ = document.Document() created = yesterday() updated = now() # Add one non-matching DocumentMeta to the database. # This should be ignored. db_session.add( document.DocumentMeta( claimant=claimant, # Different type means this should not match the query. type='different', value=value, document=document_, created=created, updated=updated, )) document.create_or_update_document_meta( session=db_session, claimant=claimant, type=type_, value=value, document=document_, created=created, updated=updated, ) document_meta = db_session.query(document.DocumentMeta).all()[-1] assert document_meta.claimant == claimant assert document_meta.type == type_ assert document_meta.value == value assert document_meta.document == document_ assert document_meta.created == created assert document_meta.updated == updated