def reset(): """ Reset the index. """ Document_Index.es_reset()
def delete(): """ Delete the index. """ Document_Index.es_delete()
def create(): """ Create the index. """ Document_Index.es_create()
def test_matches(add_doc, add_text): """ When documents match the query, write doc -> text rows. """ wp1 = add_doc(content='War and Peace, Leo Tolstoy 1') wp2 = add_doc(content='War and Peace, Leo Tolstoy 2') wp3 = add_doc(content='War and Peace, Leo Tolstoy 3') ak1 = add_doc(content='Anna Karenina, Leo Tolstoy 1') ak2 = add_doc(content='Anna Karenina, Leo Tolstoy 2') Document_Index.es_insert() text = add_text(title='War and Peace', surname='Tolstoy') text_to_docs(text.id) # Should write 3 citation links. assert Citation.select().count() == 3 # Should match "War and Peace," ignore "Anna Karenina". for doc in [wp1, wp2, wp3]: assert Citation.select().where( Citation.text==text, Citation.document==doc, Citation.tokens.contains(text.hash_tokens), )
def insert(): """ Index documents. """ Document_Index.es_insert()
def test_no_matches(add_doc, add_text): """ When no documents match, don't write any rows. """ add_doc(content='War and Peace, Leo Tolstoy') Document_Index.es_insert() text = add_text(title='Master and Man', surname='Tolstoy') text_to_docs(text.id) # Shouldn't write any rows. assert Citation.select().count() == 0
def test_es_insert(add_doc): """ Document_Index.es_insert() should index the document body and id. """ doc = add_doc(content='text') Document_Index.es_insert() es_doc = config.es.get( index='document', id=doc.id, ) assert es_doc['_source']['body'] == 'text'
def count(): """ Count documents. """ click.echo(Document_Index.es_count())
def test_citation_formats(title, surname, content, add_doc, add_text): """ Test title/author -> citation formats. """ # Pad tokens around the match. padded = ('XXX '*1000) + content + (' XXX'*1000) doc = add_doc(content=padded) Document_Index.es_insert() text = add_text(title=title, surname=surname) text_to_docs(text.id) assert Citation.select().where( Citation.text==text, Citation.document==doc, Citation.tokens.contains(text.hash_tokens), )