def test_keep_going_after_error_no_more_process_to_run(self): """ If a workflow fail, running it again with option keep_going, should not run another process if there nothing to run """ project = """file://B <- file://A echo A produces B > B echo about to fail error file://C <- file://A sleep 1 echo A produces C > C echo A have produced C file://D <- file://B echo B produces D > D echo B have produced D """ rcode1, output1 = run_tuttle_file(project, nb_workers=2) assert rcode1 == 2, output1 rcode, output = run_tuttle_file(project, nb_workers=2, keep_going=True) assert rcode == 2, output1 + "\n" + output assert output.find("* file://B") == -1, output assert output.find("Nothing to do") >= 0, output
def test_abort_if_lost_exceeds_threshold(self): """ Should disply a message and abort if processing time lost by invalidation is above the threshold """ first = """file://B <- file://A echo A produces B echo B > B file://C <- file://B echo B produces C python -c "import time; time.sleep(1.3)" echo C > C """ rcode, output = run_tuttle_file(first) assert rcode == 0, output assert isfile('C') second = """file://B <- file://A echo B has changed echo B has changed > B file://C <- file://B echo B produces C python -c "import time; time.sleep(1.3)" echo C > C """ rcode, output = run_tuttle_file(second, threshold=1) assert rcode == 2, output assert output.find("Aborting") >= 0, output
def test_removing_an_output_invalidates_process(self): """ Removing an output to a process that have succeeded should invalidate the whole process, thus invalidating all resources """ project = """file://B file://C <- file://A echo A produces B echo A produces B > B echo A produces C echo A produces C > C """ rcode, output = run_tuttle_file(project) print output assert rcode == 0, output project = """file://B <- file://A echo A produces B echo A produces B > B echo A produces C echo A produces C > C """ rcode, output = run_tuttle_file(project) assert rcode == 0, output assert output.find("file://B") >= 0, output assert output.find("file://C") >= 0, output assert output.find("A produces B") >= 0, output assert output.find("A produces C") >= 0, output
def test_changes_in_a_process_invalidates_depending_failing_process_on_invalidate( self): """ If a process failed, changing a process that it depends on should invalidate it before running (from bug) """ first = """ file://B <- file://A echo A produces B > B file://C <- file://B echo B produces invalid C echo B produces invalid C > C error """ rcode, output = run_tuttle_file(first) print output assert rcode == 2, output second = """file://B <- file://A echo A produces another B > B file://C <- file://B echo B produces invalid C echo B produces invalid C > C error """ rcode, output = run_tuttle_file(second) assert rcode == 2, output assert output.find("* file://B") >= 0, output assert output.find("* file://C") >= 0, output
def test_not_abort_if_threshold_is_0(self): """ Should abort if threshold whatever lost time is""" first = """file://B <- file://A echo A produces B echo B > B file://C <- file://B echo B produces C echo C > C """ rcode, output = run_tuttle_file(first) assert rcode == 0, output assert isfile('B') assert isfile('C') second = """file://B <- file://A echo B has changed echo B has changed > B file://C <- file://B echo B produces C echo C > C """ rcode, output = run_tuttle_file(second, threshold=0) assert rcode == 2, output assert output.find("Aborting") >= 0, output assert isfile('B') assert isfile('C')
def test_abort_if_lost_exceeds_threshold(self): """ Should disply a message and abort if processing time lost by invalidation is above the threshold """ first = """file://B <- file://A echo A produces B echo B > B file://C <- file://B echo B produces C python -c "import time; time.sleep(2)" echo C > C """ rcode, output = run_tuttle_file(first) assert rcode == 0, output assert isfile('C') second = """file://B <- file://A echo B has changed echo B has changed > B file://C <- file://B echo B produces C python -c "import time; time.sleep(2)" echo C > C """ rcode, output = run_tuttle_file(second, threshold=1) assert rcode == 2, output assert output.find("Aborting") >= 0, output
def test_remove_resource(self): """If a resource is removed from a tuttlefile, it should be invalidated""" first = """file://B <- file://A echo A produces B echo B > B file://C <- file://B echo B produces C echo C > C file://D <- file://A echo A produces D echo D > D """ rcode, output = run_tuttle_file(first) assert path.exists('B') assert path.exists('C') assert path.exists('D') second = """file://C <- file://A echo A produces C echo C > C file://D <- file://A echo A produces D echo D > D """ rcode, output = run_tuttle_file(second) assert rcode == 0 assert output.find("* file://B") >= 0, output assert output.find("* file://C") >= 0, output assert output.find("* file://D") == -1, output
def test_should_run_after_invalidation(self): """ """ project = """file://B <- file://A echo A produces B echo A produces B > B file://C <- file://B echo B produces C echo B produces C > C """ rcode, output = run_tuttle_file(project) assert rcode == 0, output project = """file://B <- file://A echo A produces another B echo A produces B > B file://C <- file://B echo B produces C echo B produces C > C """ rcode, output = run_tuttle_file(project) assert rcode == 0, output assert output.find("file://B") >= 0, output assert output.find("file://C") >= 0, output assert output.find("A produces another B") >= 0, output assert output.find("B produces C") >= 0, output
def test_remove_primary(self): """ Remove the first process and transform a resource in a primary resource should be considered as processing """ first = """file://B <- file://A echo A produces another B echo A produces B > B file://C <- file://B echo B produces C echo B produces C > C """ rcode, output = run_tuttle_file(first) assert rcode == 0, output second = """file://C <- file://B echo B produces C echo B produces C > C """ rcode, output = run_tuttle_file(second) assert rcode == 0, output assert output.find("Report has been updated") >= 0, output report = open(join('.tuttle', 'report.html')).read() assert report.find('file://A') == -1, report dump = open(join('.tuttle', 'last_workflow.pickle')).read() assert report.find('file://A') == -1, report
def test_processes_paths(self): """ After a process has run, former logs and reserved_path should have moved according to the new name of the process """ project = """file://B <- file://A echo A produces B > B echo A has produced B """ rcode, output = run_tuttle_file(project) assert rcode == 0, output out_log = open(TuttleDirectories.tuttle_dir("processes", "logs", "tuttlefile_1_stdout.txt")).read() assert out_log.find("A has produced B") > -1, out_log assert exists(TuttleDirectories.tuttle_dir("processes", "tuttlefile_1")) # out_log = open(TuttleDirectories.tuttle_dir("processes", "tuttlefile_1")).read() # assert out_log.find("echo A has produced B") > -1, out_log project = """file://C <- file://A ! python f = open('C', 'w') f.write('A produces C') print('echo A has produced C') file://B <- file://A echo A produces B > B echo A has produced B """ rcode, output = run_tuttle_file(project) assert rcode == 0, output out_log = open(TuttleDirectories.tuttle_dir("processes", "logs", "tuttlefile_6_stdout.txt")).read() assert out_log.find("A has produced B") > -1, out_log reserved_path = TuttleDirectories.tuttle_dir("processes", "tuttlefile_6") assert exists(reserved_path)
def test_change_a_resource(self): """ If a resource (not primary) has changed outside tuttle, it should be invalidated if checking integrity""" first = """file://C file://B <- file://A echo A produces B echo A produces B > B echo A produces C echo A produces C > C file://D <- file://C echo C produces D echo C produces D > D """ rcode, output = run_tuttle_file(first) assert rcode == 0, output assert path.exists('B') assert path.exists('C') with open('B', 'w') as f: f.write('B has changed') rcode, output = run_tuttle_file(first, check_integrity=True) assert rcode == 0 assert output.find("file://B") >= 0, output assert output.find("file://C") >= 0, output assert output.find("file://D") >= 0, output assert output.find("A produces B") >= 0, output assert output.find("A produces C") >= 0, output assert output.find("C produces D") >= 0, output
def test_rerun_outputless_process_if_code_changed(self): """ An outputless process should not re-run if it hasn't changed """ first = """ <- file://A echo Action after A is created """ rcode, output = run_tuttle_file(first) assert rcode == 0, output rcode, output = run_tuttle_file(first) assert rcode == 0 assert output.find("Nothing to do") >= 0, output
def test_keep_going_after_error_open(self): """ If a workflow fail, running it again with option keep_going, it should run all it can""" # The ordder matters project = """ file://B <- file://A echo A produces B > B echo A have produced B file://C <- file://A echo A won't produce C echo A won't produce C > C echo about to fail error file://D <- file://A echo A produces D > D echo A have produced D file://E <- file://A echo A produces E > E echo A have produced E file://F <- file://A echo A produces F > F echo A have produced F file://G <- file://A echo A produces G > G echo A have produced G file://H <- file://A echo A produces H > H echo A have produced H """ rcode1, output1 = run_tuttle_file(project, nb_workers=1) assert rcode1 == 2, output1 # Hope that tuttle has not run this process nb_splits = len(output1.split("A have produced")) # We can't control the order in which tuttle run the processes # but we can control the order is ok to test if nb_splits >= 7: raise SkipTest("Damned ! The tests won't be accurate because tuttle choose to run the " "failing process last \n" + str(nb_splits) + "\n" + output1) rcode, output = run_tuttle_file(project, nb_workers=1, keep_going=True) assert rcode == 2, output1 + "\n" + output assert output.find("* file://C") == -1, output assert output.find("A have produced") >= 0, output
def test_should_tell_if_already_ok(self): """ If nothing has to run, the user should be informed every thing is ok """ project = """file://B <- file://A echo A produces B > B echo A produces B """ rcode, output = run_tuttle_file(project) assert rcode == 0, output assert output.find("A produces B") >= 0, output rcode, output = run_tuttle_file(project) assert rcode == 0, output assert output.find("Nothing to do") >= 0, output
def test_dont_invalidate_if_errors(self): """ A change in a process without output should re-run the process (from bug) """ first = """ <- file://A echo Action after A is created """ rcode, output = run_tuttle_file(first) assert rcode == 0, output second = """ <- file://A echo Another action after A is created """ rcode, output = run_tuttle_file(second) assert rcode == 0 assert output.find("Another") >= 0, output
def test_code_changes(self): """ A resource should be invalidated if the code that creates it changes""" project1 = """file://B <- file://A echo A creates B > B """ rcode, output = run_tuttle_file(project1) assert isfile('B') project2 = """file://B <- file://A echo A creates B in another way> B """ rcode, output = run_tuttle_file(project2) assert rcode == 0 assert output.find("* file://B") >= 0, output assert output.find(PROCESS_HAS_CHANGED) >= 0, output
def test_no_error_with_download_process(self): """ Download process does not create code in reserved_path for the process... Thus it cant be moved when """ """ retreiving logs and reserved path from previous execution(from bug) """ project = """file://g <- http://localhost:8043/a_resource ! download file://h <- file://g ERROR """ rcode, output = run_tuttle_file(project) assert rcode == 2, output rcode, output = tuttle_invalidate() assert rcode == 0, output rcode, output = run_tuttle_file() assert rcode == 2, output
def test_preprocess_should_not_force_invalidation(self): """ The existance of preprocesses should not invalidate all the resources (from bug)""" project = """file://B <- file://A echo A produces B > B |<< echo Running preprocess """ rcode, output = run_tuttle_file(project) assert rcode == 0, output rcode, output = run_tuttle_file(project, threshold=0) assert rcode == 0, output pos = output.find("Nothing to do") assert pos >= 0, output
def test_pre_check_before_invalidation(self): """Pre check should happen before invalidation""" project1 = """file://A <- echo A > A """ rcode, output = run_tuttle_file(project1) assert isfile('A') project2 = """file://A <- echo different > A file://google.html <- file://A ! download """ rcode, output = run_tuttle_file(project2) assert rcode == 2 assert output.find("* file://B") == -1 assert output.find("Download processor") >= 0, output
def test_relative_resource_is_attached_to_tuttlefile(self): """If you move a whole project, it must still work""" project = """file://B <- file://A echo A produces B >B echo A produces B """"" run_tuttle_file(project) assert isfile('B') tmp_dir = mkdtemp() copycontent('.', tmp_dir) assert isfile(join(tmp_dir, 'B')) with CurrentDir(tmp_dir): invalidate(join(tmp_dir, 'tuttlefile'), ['file://B']) assert isfile('B'), "File B in the origin project should still exist" assert not isfile(join(tmp_dir, 'B')), "File B in the copied project should have been removed"
def test_modified_primary_resource_should_invalidate_dependencies(self): """ If a primary resource is modified, it should invalidate dependencies""" project = """file://B <- file://A echo A produces B echo B > B """ rcode, output = run_tuttle_file(project) assert rcode == 0, output with open('A', 'w') as f: f.write('A has changed') rcode, output = run_tuttle_file(project) assert rcode == 0, output assert output.find('* file://A') == -1, output assert output.find('* file://B') >= 0, output assert output.find('A produces B') >= 0, output
def test_resource_dependency_change(self): """ A resource should be invalidated if it does not depend on the same resource anymore """ project1 = """file://C <- file://A, file://B echo A creates C > C """ rcode, output = run_tuttle_file(project1) assert isfile('B') project2 = """file://C <- file://A echo A creates C > C """ rcode, output = run_tuttle_file(project2) assert rcode == 0 assert output.find("* file://C") >= 0, output assert output.find(NOT_SAME_INPUTS) >= 0, output
def test_dont_mess_up_with_outputless_process(self): """ Successful outputless process must not run again, even if some other process have the same input (from bug) """ first = """file://C <- file://A echo A produces C > C <- file://A echo Action from A """ rcode, output = run_tuttle_file(first) assert rcode == 0, output rcode, output = run_tuttle_file(first) assert rcode == 0 assert output.find("Nothing to do") >= 0, output assert output.find("Action") == -1, output
def test_duration(self): """ Should display a message if there is no tuttlefile in the current directory""" project = """file://B <- file://A echo A creates B python -c "import time; time.sleep(1)" echo A creates B > B file://C <- file://B echo A creates C python -c "import time; time.sleep(1.2)" echo A creates C > C """ rcode, output = run_tuttle_file(project) assert rcode == 0, output assert isfile('B') assert isfile('C') rcode, output = tuttle_invalidate(urls=['file://B']) assert rcode == 0, output assert output.find('* file://B') >= 0, output assert output.find('* file://C') >= 0, output assert output.find('s of processing will be lost') >= 0, output assert output.find('\n0s') == -1, output assert not isfile('B'), output assert not isfile('C'), output
def test_sqlite_processor(self): """A project with an SQLite processor should run the sql statements""" project = """sqlite://db.sqlite/pop <- file://test.csv ! csv2sqlite """ rcode, output = run_tuttle_file(project) assert rcode == 0, output with sqlite3.connect('db.sqlite') as db: cur = db.cursor() cur.execute("SELECT * FROM pop") expected = u"""Aruba,ABW,102911 Andorra,AND,79218 Afghanistan,AFG,30551674 Angola,AGO,21471618 Albania,ALB,2897366 Arab World,ARB,369762523 United Arab Emirates,ARE,9346129""".split("\n") for exp in expected: a_result = cur.next() assert a_result == tuple(exp.split(',')) try: cur.next() assert False, "Detected an extra line on the table" except: assert True
def test_workflow_must_run_after_invalidation_because_of_an_error(self): """ If a process fails, it can be invalidated then run again (from bug) """ project = """file://B <- file://A echo A produces B echo A produces B > B file://C <- file://B ERROR """ rcode, output = run_tuttle_file(project) assert rcode == 2, output rcode, output = tuttle_invalidate() assert rcode == 0, output rcode, output = run_tuttle_file(project) # If if we get here, it means no exception was raised assert rcode == 2, output
def test_invalidation_should_resist_remove_errors(self): """ If removing a resource raises an error, tuttle should display a warning""" project = """http://www.google.com <- file://A echo As if I could publish to the main page of google... """ rcode, output = run_tuttle_file(project) assert rcode == 0, output project = """http://www.google.com <- file://A echo process changed """ rcode, output = run_tuttle_file(project) assert rcode == 0, output assert output.find("http://www.google.com") >= 0, output assert output.find("Warning") >= 0, output assert output.find("should not be considered valid") >= 0, output
def test_threshold_in_command_line_run(self): """ The threshold -t parameter should be available from the command line""" first = """file://B <- file://A echo A produces B python -c "import time; time.sleep(1)" echo B > B file://C <- file://B echo B produces C echo C > C """ rcode, output = run_tuttle_file(first) assert rcode == 0, output assert isfile('B') second = """file://B <- file://A echo B has changed echo B has changed > B """ with open('tuttlefile', "w") as f: f.write(second) proc = Popen(['tuttle', 'run', '-t', '1'], stdout=PIPE) output = proc.stdout.read() rcode = proc.wait() assert rcode == 2, output assert output.find('Aborting') >= 0, output assert isfile('B'), output
def test_keep_going(self): """ If tuttle is run with option keep_going, it should run all it can and not stop at first error""" # As in Gnu Make project = """file://B <- file://A Obvious error file://C <- file://B echo B produces C > C file://D <- file://A echo A produces D echo A produces D > D file://E <- file://A Another error """ rcode, output = run_tuttle_file(project, nb_workers=1, keep_going=True) assert rcode == 2 assert output.find("::stderr") >= 0, output assert output.find("Obvious") >= 0, output assert output.find("Another") >= 0, output assert output.find("Process ended with error code 1") >= 0, output pos_have_failed = output.find("have failed") assert pos_have_failed >= 0, output assert output.find("tuttlefile_1", pos_have_failed) >= 0, output assert output.find("tuttlefile_11", pos_have_failed) >= 0, output
def test_tuttlefile_should_be_in_utf8(self): """ If nothing has to run, the user should be informed every thing is ok """ rcode, output = run_tuttle_file() assert rcode == 0, output result = file('B').read().decode('utf8') assert result.find(u"du texte accentué") >= 0, result
def test_error_in_process(self): """ When a process fail, Tuttle should exit with status code 2""" # As in Gnu Make first = """file://B <- file://A echo A produces B echo B > B file://C <- file://B Obvious syntax error echo This should not be written echo C > C file://D <- file://A echo A produces D echo D > D """ rcode, output = run_tuttle_file(first) assert rcode == 2 assert output.find("::stderr") >= 0, output assert output.find("Obvious") >= 0, output assert output.find("Process ended with error code 1") >= 0, output pos_have_failed = output.find("have failed") assert pos_have_failed >= 0, output assert output.find("tuttlefile_5", pos_have_failed) >= 0, output
def test_bad_csv_should_fail_with_csv_2sqlite(self): """ A csv without the good number of columns in one raw should make the process fail""" project = """sqlite://db.sqlite/pop <- file://bad_csv.csv ! csv2sqlite """ rcode, output = run_tuttle_file(project) assert rcode == 2, output assert output.find("Wrong number of columns on line 4")>= 0
def test_changes_in_the_graph_without_removing_resource(self): """ If the graph changes without removing resource tuttle should display a message event if the removed resource is used elsewhere (from bug) """ first = """ <- file://A echo Action after A is created. file://B <- file://A echo B > B file://C <- file://B echo C > C """ rcode, output = run_tuttle_file(first) print output assert rcode == 0, output second = """ <- file://A echo Action after A is created. file://C <- file://B echo C > C """ rcode, output = tuttle_invalidate(project=second) assert rcode == 0, output assert output.find("Report has been updated to reflect") >= 0, output
def test_a_failing_process_without_output_should_be_invalidated(self): """ When a process fail, Tuttle should exit with status code 2, even if the process has no outputs""" project = """file://B <- file://A echo A produces B echo B > B <- file://B error echo This should not be written echo C > C """ rcode, output = run_tuttle_file(project) assert rcode == 2 assert isfile('B') assert not isfile('C') report_path = join('.tuttle', 'report.html') assert isfile(report_path) report = open(report_path).read() title_match_failure = search(r'<h1>.*Failure.*</h1>', report, DOTALL) assert title_match_failure, report rcode, output = tuttle_invalidate() assert rcode == 0 report = open(report_path).read() title_match_failure = search(r'<h1>.*Failure.*</h1>', report, DOTALL) assert not title_match_failure, title_match_failure.group()
def test_binary_file_should_fail_with_csv_2sqlite(self): """ A binary file that is not a csv should make the process fail""" project = """sqlite://db.sqlite/pop <- file://tests.sqlite ! csv2sqlite """ rcode, output = run_tuttle_file(project) assert rcode == 2, output assert output.find("Is this file a valid CSV file ?")>= 0, output
def test_encoding(self): """csv2sqlite should guess the encoding """ project = """sqlite://db.sqlite/test <- file://utf8.csv ! csv2sqlite """ rcode, output = run_tuttle_file(project) # The above should fail due to encoding error assert rcode == 0, output
def test_not_modified_new_primary_resources_should_not_invalidate_dependencies( self): """ If a resource has become a primary resource, but signature has not changed that was produced with previous workflow shouldn't invalidate dependencies if it hasn't changed""" project = """file://B <- file://A echo A produces B echo A produces B > B file://C <- file://B echo B produces C echo B produces C > C """ rcode, output = run_tuttle_file(project) print output assert rcode == 0, output project = """ file://C <- file://B echo B produces C echo B produces C > C """ rcode, output = tuttle_invalidate(project=project) assert rcode == 0, output assert output.find("Report has been updated to reflect") >= 0, output
def test_cant_connect(self): """ Should display a message if tuttle cant connect to database """ project = """pg://localhost:5432/this_db_does_not_exists/table <- ! postgresql CREATE TABLE new_table AS SELECT * FROM test_table; """ rcode, output = run_tuttle_file(project) assert rcode == 2, output assert output.find("Can't connect") > -1, output
def test_odbc_processor_with_several_instuctions(self): """ An ODBC process can have several SQL instructions and create several tables""" project = """odbc://tuttle_test_db/new_table, odbc://tuttle_test_db/another_table <- odbc://tuttle_test_db/test_table ! odbc CREATE TABLE new_table AS SELECT * FROM test_table; CREATE TABLE another_table (id int, col1 varchar); """ rcode, output = run_tuttle_file(project) assert rcode == 0, output
def test_processor_has_been_fixed(self): """ Changing the processor of a process should invalidate dependencies """ first = """file://B <- file://A print("some python code") open('A', 'w').write('A') """ rcode, output = run_tuttle_file(first) assert rcode == 2, output second = """file://B <- file://A ! python print("some python code") open('B', 'w').write('A produces B') """ rcode, output = run_tuttle_file(second) assert rcode == 0, output assert output.find("file://B") >= 0, output assert isfile('B')
def test_postgresql_processor_with_several_instuctions(self): """ A PostgreSQL process can have several SQL instructions""" project = """pg://localhost:5432/tuttle_test_db/new_table, pg://localhost:5432/tuttle_test_db/another_table <- pg://localhost:5432/tuttle_test_db/test_table ! postgresql CREATE TABLE new_table AS SELECT * FROM test_table; CREATE TABLE another_table (id int, col1 varchar); """ rcode, output = run_tuttle_file(project) assert rcode == 0, output
def test_postgresql_processor(self): """A project with a PostgreSQL processor should run the sql statements""" project = """pg://localhost:5432/tuttle_test_db/new_table <- pg://localhost:5432/tuttle_test_db/test_table ! postgresql CREATE TABLE new_table AS SELECT * FROM test_table; """ rcode, output = run_tuttle_file(project) assert rcode == 0, output assert output.find("CREATE TABLE new_table AS SELECT * FROM test_table") > -1, \ "PostgresqlProcessor should log the SQL statements"
def test_odbc_processor(self): """A project with a PostgreSQL processor should run the sql statements""" project = """odbc://tuttle_test_db/new_table <- odbc://tuttle_test_db/test_table ! odbc CREATE TABLE new_table AS SELECT * FROM test_table; """ rcode, output = run_tuttle_file(project) assert rcode == 0, output assert output.find("CREATE TABLE new_table AS SELECT * FROM test_table") > -1, \ "ODBCProcessor should log the SQL statements"