def test_function_step_with_context(): document_store = JsonDocumentStore("/tmp/test-json-store", force_initialize=True) document_store.add(create_document()) new_document_store = JsonDocumentStore("/tmp/test-json-store2", force_initialize=True) def my_function(doc, context): doc.metadata.cheese = context.transaction_id logging.error("Hello") return doc assert new_document_store.count() == 0 pipeline = Pipeline(document_store) pipeline.add_step(my_function) pipeline.set_sink(new_document_store) stats = pipeline.run().statistics assert stats.documents_processed == 1 assert stats.document_exceptions == 0 assert new_document_store.count() == 1 assert new_document_store.get_document( 0).metadata.cheese == pipeline.context.transaction_id print(new_document_store.get_document(0).log)
def test_function_step_with_exception(): document_store = JsonDocumentStore("/tmp/test-json-store", force_initialize=True) document_store.add(create_document()) new_document_store = JsonDocumentStore("/tmp/test-json-store2", force_initialize=True) def my_function(doc): doc.metadata.cheese = "fishstick" raise Exception("hello world") return doc assert new_document_store.count() == 0 pipeline = Pipeline(document_store, stop_on_exception=False) pipeline.add_step(my_function) pipeline.set_sink(new_document_store) stats = pipeline.run().statistics assert stats.documents_processed == 1 assert stats.document_exceptions == 1 assert new_document_store.count() == 1 assert len(new_document_store.get_document(0).exceptions) == 1 print(new_document_store.get_document(0).exceptions)
def test_pipeline_example(): document_store = LocalDocumentStore() document_store.put("test.doc", create_document()) pipeline = Pipeline(document_store) stats = pipeline.run().statistics assert stats.documents_processed == 1
def test_pipeline_example(): document_store = JsonDocumentStore("/tmp/test-json-store", force_initialize=True) document_store.add(create_document()) if Path("/tmp/test-json-store2/index..json").is_file(): os.remove("/tmp/test-json-store2") new_document_store = JsonDocumentStore("/tmp/test-json-store2", force_initialize=True) assert new_document_store.count() == 0 pipeline = Pipeline(document_store) pipeline.set_sink(new_document_store) stats = pipeline.run().statistics assert stats.documents_processed == 1 assert new_document_store.count() == 1
def test_table_stores_with_extractor(): document_store = LocalDocumentStore() document_store.put("test.doc", create_document()) pipeline = Pipeline(document_store, stop_on_exception=False) pipeline.add_store('output', TableDataStore(columns=['cheese'])) def extractor(document, context): # An example of how we might # extract into a dict # context.get_store('output').add(['test']) return document pipeline.add_step(extractor) context = pipeline.run() assert context.get_store('output').count() == 1
def test_table_stores_with_extractor(): document_store = JsonDocumentStore("/tmp/test-json-store", force_initialize=True) document_store.add(create_document()) pipeline = Pipeline(document_store, stop_on_exception=False) pipeline.add_store('output', TableDataStore(columns=['cheese'])) def extractor(document, context): # An example of how we might # extract into a dict # context.get_store('output').add(['test']) return document pipeline.add_step(extractor) context = pipeline.run() assert pipeline.context.get_store('output').count() == 1
def test_dict_stores_with_extractor(): document_store = JsonDocumentStore("/tmp/test-json-store", force_initialize=True) document_store.add(create_document()) pipeline = Pipeline(document_store, stop_on_exception=False) pipeline.add_store('output', DictDataStore()) def extractor(document, context): # An example of how we might # extract into a dict # context.get_store('output').add({'cheese': 'test'}) return document pipeline.add_step(extractor) stats = pipeline.run().statistics assert pipeline.context.get_store('output').count() == 1
def test_function_step_with_context(): document_store = LocalDocumentStore() document_store.put("test.doc", create_document()) new_document_store = LocalDocumentStore() def my_function(doc, context): doc.metadata.cheese = context.execution_id logging.error("Hello") return doc assert new_document_store.count() == 0 pipeline = Pipeline(document_store) pipeline.add_step(my_function) pipeline.add_step(DocumentStoreWriter(new_document_store)) stats = pipeline.run().statistics assert stats.documents_processed == 1 assert stats.document_exceptions == 0 assert new_document_store.count() == 1 assert new_document_store.get_latest_document("test.doc").metadata.cheese == pipeline.context.execution_id
def test_function_step_with_exception(): document_store = LocalDocumentStore() document_store.put("test.doc", create_document()) new_document_store = LocalDocumentStore() def my_function(doc): doc.metadata.cheese = "fishstick" raise Exception("hello world") assert new_document_store.count() == 0 pipeline = Pipeline(document_store, stop_on_exception=False) pipeline.add_step(my_function) pipeline.add_step(DocumentStoreWriter(new_document_store)) stats = pipeline.run().statistics assert stats.documents_processed == 1 assert stats.document_exceptions == 1 assert new_document_store.count() == 1 assert len(new_document_store.get_latest_document("test.doc").exceptions) == 1
def test_class_step_step_with_context(): document_store = LocalDocumentStore() document_store.put('test.doc', create_document()) new_document_store = LocalDocumentStore() class MyProcessingStep: def get_name(self): return "test-step" def process(self, doc, context): doc.metadata.cheese = context.execution_id logging.error("Hello") return doc pipeline = Pipeline(document_store) pipeline.add_step(MyProcessingStep()) pipeline.add_step(DocumentStoreWriter(new_document_store)) ctxt = pipeline.run() assert ctxt.statistics.documents_processed == 1 assert ctxt.statistics.document_exceptions == 0 assert new_document_store.get_latest_document("test.doc").metadata.cheese == pipeline.context.execution_id