def test_initial_artifact_hash(): with temprun() as runner: filename = "source.txt" with open(filename, "w") as f: f.write("hello this is some text") artifact = InitialArtifact(filename, runner=runner) artifact.name = filename artifact.run() first_hashstring = artifact.hashstring time.sleep(1.1) # make sure mtime is at least 1 second different with open(filename, "w") as f: f.write("hello this is different text") artifact = InitialArtifact(filename, runner=runner) artifact.name = filename artifact.run() second_hashstring = artifact.hashstring assert first_hashstring != second_hashstring
def test_split_html_filter(): with temprun() as runner: contents = """ <p>This is at the top.</p> <!-- split "a-page" --> some content on a page <!-- split "another-page" --> some content on another page <!-- endsplit --> bottom """ doc = Doc("example.html|splithtml", contents=contents, runner=runner) runner.docs = [doc] runner.run() assert doc.children[2].key == "a-page.html" assert doc.children[3].key == "another-page.html" od = doc.output().data() assert "<p>This is at the top.</p>" in od assert '<a href="a-page.html">' in od assert '<a href="another-page.html">' in od assert "bottom" in od assert "<p>This is at the top.</p>" in doc.children[2].output().data() assert "some content on a page" in doc.children[2].output().data() assert "bottom" in doc.children[2].output().data() assert "<p>This is at the top.</p>" in doc.children[3].output().data() assert "some content on another page" in doc.children[3].output().data( ) assert "bottom" in doc.children[3].output().data()
def test_add_task(): with temprun() as runner: attrs = { "args" : {}, "doc.key" : "abc23456", "key_with_batch_id.return_value" : "def1234556", "runner.batch_id" : 1001, "state" : "running", "created_by_doc" : None, "key" : "file.txt" } task = MagicMock(**attrs) runner.db.add_task_before_running(task) runner.db.conn.commit() sql = """select * from tasks""" runner.db.cursor.execute(sql) row = runner.db.cursor.fetchone() assert row['batch_id'] == 1001 assert row['key'] == "file.txt" assert row['class_name'] == "MagicMock" assert row['started_at'] < datetime.now() assert not row['created_by_doc'] assert runner.db.get_next_batch_id() == 1002
def test_doc_setup(): with temprun() as runner: doc = Doc("abc.txt|dexy|dexy", runner=runner) assert doc.key == "abc.txt|dexy|dexy" assert doc.name == "abc.txt" assert doc.filters == ["dexy", "dexy"] assert doc.children[0].key == "abc.txt" assert doc.children[1].key == "abc.txt|dexy" assert doc.children[2].key == "abc.txt|dexy|dexy" assert doc.children[0].__class__.__name__ == "InitialVirtualArtifact" assert doc.children[1].__class__.__name__ == "FilterArtifact" assert doc.children[2].__class__.__name__ == "FilterArtifact" assert not hasattr(doc.children[0], 'next_filter_alias') assert doc.children[1].next_filter_alias == "dexy" assert doc.children[2].next_filter_alias == None assert not doc.children[0].prior assert doc.children[ 1].prior.__class__.__name__ == "InitialVirtualArtifact" assert doc.children[2].prior.__class__.__name__ == "FilterArtifact" assert not doc.children[0].prior assert doc.children[1].prior.key == "abc.txt" assert doc.children[2].prior.key == "abc.txt|dexy"
def test_split_html_filter(): with temprun() as runner: contents = """ <p>This is at the top.</p> <!-- split "a-page" --> some content on a page <!-- split "another-page" --> some content on another page <!-- endsplit --> bottom """ doc = Doc("example.html|splithtml", contents=contents, runner=runner) runner.docs = [doc] runner.run() assert doc.children[2].key == "a-page.html" assert doc.children[3].key == "another-page.html" od = doc.output().data() assert "<p>This is at the top.</p>" in od assert '<a href="a-page.html">' in od assert '<a href="another-page.html">' in od assert "bottom" in od assert "<p>This is at the top.</p>" in doc.children[2].output().data() assert "some content on a page" in doc.children[2].output().data() assert "bottom" in doc.children[2].output().data() assert "<p>This is at the top.</p>" in doc.children[3].output().data() assert "some content on another page" in doc.children[3].output().data() assert "bottom" in doc.children[3].output().data()
def test_create_virtual_initial_artifact_with_dict(): with temprun() as runner: od_contents = OrderedDict() od_contents['1'] = "these are the contents" doc = Doc("abc.txt", contents = od_contents, runner=runner) runner.run_tasks(doc) assert doc.children[0].output_data.__class__.__name__ == "SectionedData"
def test_doc_setup(): with temprun() as runner: doc = Doc("abc.txt|dexy|dexy", runner=runner) assert doc.key == "abc.txt|dexy|dexy" assert doc.name == "abc.txt" assert doc.filters == ["dexy", "dexy"] assert doc.children[0].key == "abc.txt" assert doc.children[1].key == "abc.txt|dexy" assert doc.children[2].key == "abc.txt|dexy|dexy" assert doc.children[0].__class__.__name__ == "InitialVirtualArtifact" assert doc.children[1].__class__.__name__ == "FilterArtifact" assert doc.children[2].__class__.__name__ == "FilterArtifact" assert not hasattr(doc.children[0], 'next_filter_alias') assert doc.children[1].next_filter_alias == "dexy" assert doc.children[2].next_filter_alias == None assert not doc.children[0].prior assert doc.children[1].prior.__class__.__name__ == "InitialVirtualArtifact" assert doc.children[2].prior.__class__.__name__ == "FilterArtifact" assert not doc.children[0].prior assert doc.children[1].prior.key == "abc.txt" assert doc.children[2].prior.key == "abc.txt|dexy"
def test_access_other_documents(): with temprun() as runner: doc = Doc("hello.txt|newdoc", contents="hello", runner=runner) parent = Doc("test.txt|others", doc, contents="hello", runner=runner) runner.docs = [parent] runner.run() assert parent.output().data() == """Here is a list of previous docs in this tree (not including test.txt|others).
def test_update_task(): with temprun() as runner: attrs = { "args" : {}, "doc.key" : "abc23456", "key_with_batch_id.return_value" : "def1234556", "runner.batch_id" : 1001, "hashstring" : "abc123001", "state" : "running", "created_by_doc" : None, "key" : "file.txt" } task = MagicMock(**attrs) runner.db.add_task_before_running(task) runner.db.conn.commit() attrs = { "state" : "complete" } runner.db.update_task_after_running(task) sql = """select * from tasks""" runner.db.cursor.execute(sql) row = runner.db.cursor.fetchone() assert row['hashstring'] == 'abc123001'
def test_create_virtual_initial_artifact_with_dict(): with temprun() as runner: od_contents = OrderedDict() od_contents['1'] = "these are the contents" doc = Doc("abc.txt", contents=od_contents, runner=runner) runner.run_tasks(doc) assert doc.children[ 0].output_data.__class__.__name__ == "SectionedData"
def test_access_other_documents(): with temprun() as runner: doc = Doc("hello.txt|newdoc", contents="hello", runner=runner) parent = Doc("test.txt|others", doc, contents="hello", runner=runner) runner.docs = [parent] runner.run() assert parent.output().data( ) == """Here is a list of previous docs in this tree (not including test.txt|others).
def test_output_reporter(): with temprun() as runner: doc = Doc("hello.txt", contents="hello", runner=runner) reporter = OutputReporter() runner.docs = [doc] runner.run() runner.report(reporter) assert os.path.exists("output") assert os.path.exists("output/hello.txt")
def test_pattern_doc_args(): with temprun() as runner: with open("hello.txt", "w") as f: f.write("hello!") doc = PatternDoc("*.txt", foo="bar", runner=runner) assert doc.args['foo'] == 'bar' assert isinstance(doc.children[0], Doc) assert doc.children[0].key == "hello.txt" assert doc.children[0].args['foo'] == 'bar'
def test_virtual_artifact(): with temprun() as runner: a = InitialVirtualArtifact("abc.txt", contents="these are the contents", runner=runner) a.name = "abc.txt" a.run() assert a.output_data.is_cached() assert a.output_data.data() == "these are the contents"
def test_doc_children_artifacts(): with temprun() as runner: doc = Doc("hello.txt|newdoc", contents="hello", runner=runner) parent = Doc("parent.txt|process", doc, contents="hello", runner=runner) runner.docs = [parent] assert len(doc.children) == 2 assert isinstance(doc.children[0], InitialVirtualArtifact) assert isinstance(doc.children[1], FilterArtifact) assert len(doc.artifacts) == 2 assert isinstance(doc.artifacts[0], InitialVirtualArtifact) assert isinstance(doc.artifacts[1], FilterArtifact) assert len(parent.children) == 3 assert isinstance(parent.children[0], Doc) assert parent.children[0] == doc assert isinstance(parent.children[1], InitialVirtualArtifact) assert isinstance(parent.children[2], FilterArtifact) assert len(parent.artifacts) == 2 assert isinstance(parent.artifacts[0], InitialVirtualArtifact) assert isinstance(parent.artifacts[1], FilterArtifact) runner.run() assert len(doc.children) == 3 assert isinstance(doc.children[0], InitialVirtualArtifact) assert isinstance(doc.children[1], FilterArtifact) assert isinstance(doc.children[2], Doc) assert len(doc.artifacts) == 2 assert len(parent.children) == 3 assert len(parent.artifacts) == 2 assert runner.registered_docs()[0].key == "hello.txt|newdoc" assert runner.registered_docs()[1].key == "parent.txt|process" assert runner.registered_docs()[2].key == "newfile.txt|processtext"
def test_unprocessed_directory_archive_filter(): with temprun() as runner: with open("abc.txt", "w") as f: f.write('this is abc') with open("def.txt", "w") as f: f.write('this is def') doc = Doc("archive.tgz|tgzdir", contents="ignore", tgzdir={'dir' : '.'}, runner=runner) runner.docs = [doc] runner.run() runner.report() assert os.path.exists("output/archive.tgz") with tarfile.open("output/archive.tgz", mode="r:gz") as tar: names = tar.getnames() assert "./abc.txt" in names assert "./def.txt" in names
def test_completed_children(): with temprun() as runner: with divert_stdout() as stdout: grandchild_task = SubclassTask("grandchild", runner=runner) child_task = SubclassTask("child", grandchild_task, runner=runner) parent_task = SubclassTask("parent", child_task, runner=runner) runner.docs = [parent_task] runner.run() assert stdout.getvalue() == "pre 'parent' pre 'child' pre 'grandchild' run 'grandchild' post 'grandchild' run 'child' post 'child' run 'parent' post 'parent'" assert "SubclassTask:grandchild" in parent_task.completed_children.keys() assert "SubclassTask:child" in parent_task.completed_children.keys() assert "SubclassTask:grandchild" in child_task.completed_children.keys() assert len(grandchild_task.completed_children) == 0
def test_split_html_additional_filters(): with temprun() as runner: contents = """ <p>This is at the top.</p> <!-- split "a-page" --> some content on a page <!-- split "another-page" --> some content on another page <!-- endsplit --> bottom """ doc = Doc( "example.html|splithtml", contents=contents, splithtml={"additional_doc_filters": "processtext"}, runner=runner, ) runner.docs = [doc] runner.run() assert doc.children[2].key == "a-page.html|processtext" assert doc.children[3].key == "another-page.html|processtext" od = doc.output().data() assert "<p>This is at the top.</p>" in od assert '<a href="a-page.html">' in od assert '<a href="another-page.html">' in od assert "bottom" in od assert "<p>This is at the top.</p>" in doc.children[2].output().data() assert "some content on a page" in doc.children[2].output().data() assert "bottom" in doc.children[2].output().data() assert "Dexy processed the text" in doc.children[2].output().data() assert "<p>This is at the top.</p>" in doc.children[3].output().data() assert "some content on another page" in doc.children[3].output().data() assert "bottom" in doc.children[3].output().data() assert "Dexy processed the text" in doc.children[3].output().data()
def test_output_is_data(): with temprun() as runner: doc = Doc("abc.txt", contents="these are the contents", runner=runner) runner.run_tasks(doc) assert isinstance(doc.output(), Data)
def test_create_doc_with_child(): with temprun() as runner: doc = Doc("parent.txt", Doc("child.txt", runner=runner), runner=runner) assert doc.key == "parent.txt" assert doc.children[0].key == "child.txt" assert doc.children[1].key == "parent.txt"
def test_create_virtual_initial_artifact(): with temprun() as runner: doc = Doc("abc.txt", contents="these are the contents", runner=runner) runner.run_tasks(doc) assert doc.children[0].__class__.__name__ == "InitialVirtualArtifact" assert doc.children[0].output_data.__class__.__name__ == "GenericData"
def test_blank_alias(): with temprun() as runner: Doc("abc.txt|", runner=runner)
def test_pydoc_filter(): with temprun() as runner: doc = Doc("modules.txt|pydoc", contents="dexy", runner=runner) runner.docs = [doc] runner.run() assert "dexy.artifact.Artifact.__class__:source" in doc.output().keys()
def test_key_value_example(): with temprun() as runner: doc = Doc("hello.txt|keyvalueexample", contents="hello",runner=runner) runner.docs = [doc] runner.run() assert doc.output().as_text() == "foo: bar"
def test_create_doc_with_filters(): with temprun() as runner: doc = Doc("abc.txt|outputabc", contents="these are the contents", runner=runner) runner.run_tasks(doc)
def test_run_incorrectly(): with divert_stdout() as stdout: with temprun() as runner: for demotaskinstance in (SubclassTask("demo", runner=runner),): demotaskinstance() assert "run 'demo'" == stdout.getvalue()
def test_setup_pattern_doc_one_filter(): with temprun() as runner: doc = PatternDoc("*.txt|dexy", runner=runner) assert doc.file_pattern == "*.txt" assert doc.filter_aliases == ['dexy']
def test_tags_filter(): with temprun() as runner: doc = Doc("example.txt|tags", contents="<p>the text</p>", tags={"tags" : ["html", "body"]}, runner=runner) runner.docs = [doc] runner.run() assert doc.output().data() == "<html><body>\n<p>the text</p>\n</body></html>"
def test_doc_run(): with temprun() as runner: doc = Doc("abc.txt|dexy|dexy", runner=runner) doc.run() assert doc in runner.registered
def test_setup_pattern_doc_many_filters(): with temprun() as runner: doc = PatternDoc("*.txt|dexy|dexy|dexy", runner=runner) assert doc.file_pattern == "*.txt" assert doc.filter_aliases == ['dexy', 'dexy', 'dexy']
def test_key_value_example(): with temprun() as runner: doc = Doc("hello.txt|keyvalueexample", contents="hello", runner=runner) runner.docs = [doc] runner.run() assert doc.output().as_text() == "foo: bar"
def test_setup_pattern_doc_no_filters(): with temprun() as runner: doc = PatternDoc("*.txt", runner=runner) assert doc.file_pattern == "*.txt" assert doc.filter_aliases == []
def test_word_wrap_filter(): with temprun() as runner: doc = Doc("example.txt|wrap", contents="this is a line of text", wrap={"width" : 5}, runner=runner) runner.docs = [doc] runner.run() assert doc.output().data() == "this\nis a\nline\nof\ntext"