def test_document_title_returns_first(): doc = Document() DocumentMeta(type='title', value='The US Title', document=doc, claimant='http://example.com') DocumentMeta(type='title', value='The UK Title', document=doc, claimant='http://example.co.uk') db.Session.add(doc) db.Session.flush() assert doc.title == 'The US Title'
def test_document_title_meta_not_found(): doc = Document() DocumentMeta(type='other', value='something', document=doc, claimant='http://example.com') db.Session.add(doc) db.Session.flush() assert doc.title is None
def create_or_update_document_meta(es_meta, pg_document): meta = DocumentMeta.query.filter( DocumentMeta.claimant_normalized == es_meta.claimant_normalized, DocumentMeta.type == es_meta.type).one_or_none() if meta is None: meta = DocumentMeta(claimant=es_meta.claimant, type=es_meta.type, value=es_meta.value, created=es_meta.created, updated=es_meta.updated, document=pg_document) Session.add(meta) else: meta.value = es_meta.value meta.updated = es_meta.updated if not meta.document == pg_document: log.warn('Found DocumentMeta with id {:d} does not match expected document with id {:d}', meta.id, pg_document.id)
def create_or_update_document_meta(es_meta, pg_document): meta = DocumentMeta.query.filter( DocumentMeta.claimant_normalized == es_meta.claimant_normalized, DocumentMeta.type == es_meta.normalized_type).one_or_none() if meta is None: meta = DocumentMeta(claimant=es_meta.claimant, type=es_meta.normalized_type, value=es_meta.value, created=es_meta.created, updated=es_meta.updated, document=pg_document) Session.add(meta) else: meta.value = es_meta.value meta.updated = es_meta.updated if not meta.document == pg_document: log.warn( 'Found DocumentMeta with id %d does not match expected document with id %d', meta.id, pg_document.id)
def merge_data(request): master = Document(document_uris=[DocumentURI( claimant='https://en.wikipedia.org/wiki/Main_Page', uri='https://en.wikipedia.org/wiki/Main_Page', type='self-claim')], meta=[DocumentMeta( claimant='https://en.wikipedia.org/wiki/Main_Page', type='title', value='Wikipedia, the free encyclopedia')]) duplicate = Document(document_uris=[DocumentURI( claimant='https://m.en.wikipedia.org/wiki/Main_Page', uri='https://en.wikipedia.org/wiki/Main_Page', type='rel-canonical')], meta=[DocumentMeta( claimant='https://m.en.wikipedia.org/wiki/Main_Page', type='title', value='Wikipedia, the free encyclopedia')]) db.Session.add_all([master, duplicate]) db.Session.flush() return (master, duplicate)