def test_transform_with_missing_formatter_args(self): s = StringIO() pipeline_printout(s, [transform_with_missing_formatter_args], verbose=4, wrap_width=10000, pipeline="main") self.assertIn("Unmatched field {dynamic_message}", s.getvalue()) pipeline_run([transform_with_missing_formatter_args], verbose=0, pipeline="main")
def test_output_up_to_date_func_changed(self): """Input file exists, output up to date, function body changed""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') pipeline_run([transform1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS, pipeline="main") # simulate source change if sys.hexversion >= 0x03000000: split1.__code__, transform1.__code__ = transform1.__code__, split1.__code__ else: split1.func_code, transform1.func_code = transform1.func_code, split1.func_code for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [transform1], verbose=6, checksum_level=chksm, pipeline="main") if chksm >= CHECKSUM_FUNCTIONS: self.assertIn('Job needs update:', s.getvalue()) self.assertIn('Pipeline function has changed', s.getvalue()) else: #self.assertIn('Job up-to-date', s.getvalue()) pass # clean up our function-changing mess! if sys.hexversion >= 0x03000000: split1.__code__, transform1.__code__ = transform1.__code__, split1.__code__ else: split1.func_code, transform1.func_code = transform1.func_code, split1.func_code
def test_raises_error(self): """run a function that fails but creates output, then check what should run""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') time.sleep(.5) del runtime_data[:] # poo. Shouldn't this be RuntimeError? with self.assertRaises(RethrownJobError): # generates output then fails pipeline_run([transform_raise_error], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS, pipeline="main") for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [transform_raise_error], verbose=6, checksum_level=chksm, pipeline="main") if chksm >= CHECKSUM_HISTORY_TIMESTAMPS: self.assertIn('Job needs update:', s.getvalue()) self.assertIn('left over from a failed run?', s.getvalue()) else: #self.assertIn('Job up-to-date', s.getvalue()) pass
def test_mkdir_run(self): """Run mkdir""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() pipeline_run([test_transform, test_transform2], verbose=6, multiprocess=2)
def test_combinations3_run(self): """Run product""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() pipeline_run([test_combinations3_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job) self.assertEqual(open(workdir + "/merged.results").read(), "abc,abd,acd,bcd,")
def test_newstyle_mkdir_run(self): test_pipeline = Pipeline("test") test_pipeline.split(task_func=generate_initial_files1, input=1, output=[ tempdir + "/" + prefix + "_name.tmp1" for prefix in "abcd" ]) test_pipeline.transform( task_func = test_transform, input = generate_initial_files1, filter = formatter(), output = "{path[0]}/{basename[0]}.dir/{basename[0]}.tmp2")\ .mkdir(tempdir + "/test1")\ .mkdir(tempdir + "/test2")\ .mkdir(generate_initial_files1, formatter(), ["{path[0]}/{basename[0]}.dir", 3, "{path[0]}/{basename[0]}.dir2"]) test_pipeline.mkdir(test_transform2, tempdir + "/test3")\ .mkdir(generate_initial_files1, formatter(), "{path[0]}/{basename[0]}.dir2") cleanup_tmpdir() pipeline_run([test_transform, test_transform2], verbose=0, multiprocess=2, pipeline="main")
def test_permutations3_run(self): """Run product""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() pipeline_run([test_permutations3_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job) self.assertEqual(open(workdir + "/merged.results").read(), 'abc,abd,acb,acd,adb,adc,bac,bad,bca,bcd,bda,bdc,cab,cad,cba,cbd,cda,cdb,dab,dac,dba,dbc,dca,dcb,')
def test_job_history_with_exceptions_run(self): """Run""" for i in range(1): cleanup_tmpdir() try: pipeline_run( [test_task4], verbose=0, #multithread = 2, one_second_per_job=one_second_per_job, pipeline="main") except: pass s = StringIO() pipeline_printout(s, [test_task4], verbose=VERBOSITY, wrap_width=10000, pipeline="main") # # task 2 should be up to date because exception was throw in task 3 # pipeline_printout_str = s.getvalue() correct_order = not re.search( 'Tasks which will be run:.*\n(.*\n)*Task = test_task2', pipeline_printout_str) if not correct_order: print(pipeline_printout_str) self.assertTrue(correct_order) sys.stderr.write(".") print()
def test_combinations_with_replacement3_run(self): """Run product""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() pipeline_run([test_combinations_with_replacement3_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job) self.assertEqual(open(workdir + "/merged.results").read(), 'aaa,aab,aac,aad,abb,abc,abd,acc,acd,add,bbb,bbc,bbd,bcc,bcd,bdd,ccc,ccd,cdd,ddd,')
def test_task (self): pipeline_run([task4], multiprocess = 10, verbose = 0, pipeline= "main") correct_output = "{tempdir}a.1.output:test_transform_inputs.py,{tempdir}a.1,{tempdir}c.1,{tempdir}d.1;{tempdir}b.1.output:test_transform_inputs.py,{tempdir}b.1,{tempdir}c.1,{tempdir}d.1;".format(tempdir = tempdir) with open(tempdir + "final.output") as ff: real_output = ff.read() self.assertEqual(correct_output, real_output)
def test_merge_output(self): """test multiple-input checksums""" # one output incorrectly generated cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') pipeline_run([split1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS) job_history = dbdict.open(get_default_history_file_name(), picklevalues=True) del job_history[os.path.relpath(split1_outputs[0])] for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [merge2], verbose=5, checksum_level=chksm) if chksm >= CHECKSUM_HISTORY_TIMESTAMPS: self.assertIn('Job needs update:', s.getvalue()) self.assertIn('Previous incomplete run leftover', s.getvalue()) else: self.assertIn('Job up-to-date', s.getvalue()) # make sure the jobs run fine cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') pipeline_run([merge2], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS) for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [merge2], verbose=5, checksum_level=chksm) self.assertIn('Job up-to-date', s.getvalue()) self.assertNotIn('Job needs update:', s.getvalue()) self.assertNotIn('Previous incomplete run leftover', s.getvalue())
def test_no_re_match(self): try: pipeline_run(multiprocess=10, verbose=0, pipeline="main") except: return raise Exception( "Inputs(...) with multiple arguments should have thrown an exception")
def ruffus_main(options, args): 'Main entry point for ruffus pipelines' if options.just_print: pipeline_printout( sys.stdout, options.target_tasks, options.forced_tasks, verbose=options.verbose) elif options.flowchart: pipeline_printout_graph( open(options.flowchart, "w"), # use flowchart file name extension to decide flowchart format # e.g. svg, jpg etc. os.path.splitext(options.flowchart)[1][1:], options.target_tasks, options.forced_tasks, no_key_legend=not options.key_legend_in_graph) else: pipeline_run( options.target_tasks, options.forced_tasks, multiprocess=options.jobs, logger=main_logger, verbose=options.verbose, touch_files_only=options.touch_only)
def test_ruffus (self): pipeline_run(multiprocess = 50, verbose = 0, pipeline= "main") with open(os.path.join(data_dir, "summary.5")) as ii: active_text = ii.read() if active_text != expected_active_text: raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" % (expected_active_text, active_text))
def test_product_run(self): """Run product""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() pipeline_run([test_product_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job) self.assertEqual(open(workdir + "/merged.results").read(), "aeg,aeh,afg,afh,beg,beh,bfg,bfh,ceg,ceh,cfg,cfh,deg,deh,dfg,dfh,")
def test_split_output(self): """test multiple-output checksums""" # outputs out of date cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') pipeline_run([split1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS, pipeline="main") time.sleep(.5) with open(input_file, 'w') as outfile: outfile.write('testme') for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [split1], verbose=6, checksum_level=chksm, pipeline="main") self.assertIn('Job needs update:', s.getvalue()) # all outputs incorrectly generated cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') time.sleep(.5) for f in split1_outputs: with open(f, 'w') as outfile: outfile.write('testme') for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [split1], verbose=6, checksum_level=chksm, pipeline="main") if chksm >= CHECKSUM_HISTORY_TIMESTAMPS: self.assertIn('Job needs update:', s.getvalue()) self.assertIn('left over from a failed run?', s.getvalue()) else: #self.assertIn('Job up-to-date', s.getvalue()) pass # one output incorrectly generated cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') pipeline_run([split1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS, pipeline="main") job_history = dbdict.open( get_default_history_file_name(), picklevalues=True) del job_history[os.path.relpath(split1_outputs[0])] for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [split1], verbose=6, checksum_level=chksm, pipeline="main") if chksm >= CHECKSUM_HISTORY_TIMESTAMPS: self.assertIn('Job needs update:', s.getvalue()) self.assertIn('left over from a failed run?', s.getvalue()) else: #self.assertIn('Job up-to-date', s.getvalue()) pass
def test_combinations_with_replacement2_run(self): """Run product""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() pipeline_run([test_combinations_with_replacement2_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job, pipeline= "main") with open(tempdir + "/merged.results") as oo: self.assertEqual(oo.read(), "aa,ab,ac,ad,bb,bc,bd,cc,cd,dd,")
def test_suffix_unmatched_run2(self): """Run transform(...,suffix()...)""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() pipeline_run([test_suffix_unmatched_task2], verbose=0, multiprocess=parallelism, one_second_per_job=one_second_per_job)
def test_no_re_match (self): save_to_str_logger = t_save_to_str_logger() pipeline_run(multiprocess = 10, logger = save_to_str_logger, verbose = 1, pipeline= "main") print(save_to_str_logger.warning_str) self.assertTrue("no file names matched" in save_to_str_logger.warning_str) print("\n Warning printed out correctly", file=sys.stderr)
def test_no_re_match(self): try: pipeline_run(multiprocess=10, verbose=0, pipeline="main") except: return raise Exception( "Inputs(...) with multiple arguments should have thrown an exception" )
def test_task(self): pipeline_run([task4], multiprocess=10, verbose=0, pipeline="main") correct_output = "{tempdir}a.1.output:test_transform_inputs.py,{tempdir}a.1,{tempdir}c.1,{tempdir}d.1;{tempdir}b.1.output:test_transform_inputs.py,{tempdir}b.1,{tempdir}c.1,{tempdir}d.1;".format( tempdir=tempdir) with open(tempdir + "final.output") as ff: real_output = ff.read() self.assertEqual(correct_output, real_output)
def test_ruffus(self): pipeline_run(multiprocess=50, verbose=0, pipeline="main") with open(os.path.join(data_dir, "summary.5")) as ii: active_text = ii.read() if active_text != expected_active_text: raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" % (expected_active_text, active_text))
def test_exception_logging(self): try: pipeline_run(multiprocess=50, verbose=0, pipeline="main") except ruffus.ruffus_exceptions.RethrownJobError as e: log.info(e) for exc in e.args: task_name, job_name, exc_name, exc_value, exc_stack = exc return raise Exception("Missing exception")
def test_task (self): save_to_str_logger = t_save_to_str_logger() pipeline_run(multiprocess = 10, logger = save_to_str_logger, verbose = 1, pipeline= "main") self.assertTrue("@files() was empty" in save_to_str_logger.warning_str) print("\n Warning printed out correctly", file=sys.stderr)
def test_exception_logging(self): try: pipeline_run(multiprocess = 50, verbose = 0, pipeline= "main") except ruffus.ruffus_exceptions.RethrownJobError as e: log.info(e) for exc in e.args: task_name, job_name, exc_name, exc_value, exc_stack = exc return raise Exception("Missing exception")
def test_active_if_true (self): global pipeline_active_if pipeline_active_if = True pipeline_run(multiprocess = 50, verbose = 0, pipeline= "main") with open("test_active_if/summary.5") as ii: active_text = ii.read() if active_text != expected_active_text: raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" % (active_text, expected_active_text))
def test_collate(self): self.cleanup_tmpdir() s = StringIO() pipeline_printout(s, [combine_results], verbose=5, wrap_width = 10000) self.assertTrue('Job needs update: Missing files\n' in s.getvalue()) #print s.getvalue() pipeline_run([combine_results], verbose=0)
def test_collate(self): self.cleanup_tmpdir() s = StringIO() pipeline_printout(s, [combine_results], verbose=5, wrap_width=10000) self.assertTrue('Job needs update: Missing files\n' in s.getvalue()) #print s.getvalue() pipeline_run([combine_results], verbose=0)
def test_active_if_false (self): global pipeline_active_if pipeline_active_if = False pipeline_run(multiprocess = 50, verbose = 0, pipeline= "main") with open("test_active_if/summary.5") as ii: inactive_text = ii.read() if inactive_text != expected_inactive_text: raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" % (inactive_text, expected_inactive_text)) shutil.rmtree("test_active_if")
def test_permutations2_run(self): """Run product""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() pipeline_run([check_permutations2_merged_task], verbose=0, multiprocess=100, one_second_per_job=one_second_per_job, pipeline="main") with open(tempdir + "/merged.results") as oo: self.assertEqual(oo.read(), "ab,ac,ad,ba,bc,bd,ca,cb,cd,da,db,dc,")
def test_collate(self): self.cleanup_tmpdir() s = StringIO() pipeline_printout(s, [combine_results], verbose=5, wrap_width = 10000, pipeline= "main") self.assertTrue(re.search('Job needs update:.*Missing files.*', s.getvalue(), re.DOTALL) is not None) #print s.getvalue() pipeline_run([combine_results], verbose=0, pipeline= "main")
def test_task(self): save_to_str_logger = t_save_to_str_logger() pipeline_run(multiprocess=10, logger=save_to_str_logger, verbose=1, pipeline="main") self.assertTrue("@files() was empty" in save_to_str_logger.warning_str) print("\n Warning printed out correctly", file=sys.stderr)
def test_recreate_job_history(self): """Run""" global throw_exception throw_exception = None cleanup_tmpdir() # # print "Initial run without creating sqlite file" # pipeline_run([test_task4], verbose=0, checksum_level=CHECKSUM_FILE_TIMESTAMPS, multithread=10, one_second_per_job=one_second_per_job, pipeline="main") # # print "printout without sqlite" # s = StringIO() pipeline_printout(s, [test_task4], checksum_level=CHECKSUM_FILE_TIMESTAMPS, pipeline="main") self.assertTrue(not re.search( 'Tasks which will be run:.*\n(.*\n)*Task = ', s.getvalue())) # # print "printout expecting sqlite file" # s = StringIO() pipeline_printout(s, [test_task4], pipeline="main") self.assertTrue( re.search('Tasks which will be run:.*\n(.*\n)*Task = ', s.getvalue())) # # print "Regenerate sqlite file" # pipeline_run([test_task4], checksum_level=CHECKSUM_FILE_TIMESTAMPS, history_file=get_default_history_file_name(), multithread=1, verbose=0, touch_files_only=2, one_second_per_job=one_second_per_job, pipeline="main") # # print "printout expecting sqlite file" # s = StringIO() pipeline_printout(s, [test_task4], verbose=VERBOSITY, pipeline="main") succeed = not re.search('Tasks which will be run:.*\n(.*\n)*Task = ', s.getvalue()) if not succeed: print(s.getvalue(), file=sys.stderr) self.assertTrue(succeed) throw_exception = False
def test_combinations_with_replacement2_run(self): """Run product""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() pipeline_run([test_combinations_with_replacement2_merged_task], verbose=0, multiprocess=100, one_second_per_job=one_second_per_job) with open(workdir + "/merged.results") as oo: self.assertEqual(oo.read(), "aa,ab,ac,ad,bb,bc,bd,cc,cd,dd,")
def do_main(): print("Press Ctrl-C Now!!", file=sys.stdout) sys.stdout.flush() time.sleep(2) print("Start....", file=sys.stdout) sys.stdout.flush() ruffus.pipeline_run(verbose=11, multiprocess=5, pipeline="main") print("too late!!", file=sys.stdout) sys.stdout.flush() cleanup_tmpdir()
def test_no_re_match(self): save_to_str_logger = t_save_to_str_logger() pipeline_run(multiprocess=10, logger=save_to_str_logger, verbose=1, pipeline="main") print(save_to_str_logger.warning_str) self.assertTrue( "no file names matched" in save_to_str_logger.warning_str) print("\n Warning printed out correctly", file=sys.stderr)
def test_active_if_false(self): global pipeline_active_if pipeline_active_if = False pipeline_run(multiprocess=50, verbose=0, pipeline="main") with open("test_active_if/summary.5") as ii: inactive_text = ii.read() if inactive_text != expected_inactive_text: raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" % (inactive_text, expected_inactive_text)) shutil.rmtree("test_active_if")
def test_active_if_true(self): global pipeline_active_if pipeline_active_if = True pipeline_run(multiprocess=50, verbose=0, pipeline="main") with open("test_active_if/summary.5") as ii: active_text = ii.read() if active_text != expected_active_text: raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" % (active_text, expected_active_text))
def main(): parser = cmdline.get_argparse(description="Trench Run pipeline") args = parser.parse_args() if args.target_tasks: cmdline.run(args) else: pipeline_run(publish_data)
def test_combinations3_run(self): """Run product""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() pipeline_run([test_combinations3_merged_task], verbose=0, multiprocess=100, one_second_per_job=one_second_per_job, pipeline="main") with open(tempdir + "/merged.results") as oo: self.assertEqual(oo.read(), "abc,abd,acd,bcd,")
def test_ouput_up_to_date(self): """Input file exists, output up to date""" # output is up to date-- not run for any levels cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') pipeline_run([transform1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS) for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [transform1], verbose=5, checksum_level=chksm) self.assertIn('Job up-to-date', s.getvalue())
def test_product_run(self): """Run product""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() pipeline_run([test_product_merged_task], verbose=0, multiprocess=100, one_second_per_job=one_second_per_job) with open(workdir + "/merged.results") as oo: self.assertEqual( oo.read(), "aeg,aeh,afg,afh,beg,beh,bfg,bfh,ceg,ceh,cfg,cfh,deg,deh,dfg,dfh," )
def test_permutations3_run(self): """Run product""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() pipeline_run([test_permutations3_merged_task], verbose=0, multiprocess=100, one_second_per_job=one_second_per_job) with open(workdir + "/merged.results") as oo: self.assertEqual( oo.read(), 'abc,abd,acb,acd,adb,adc,bac,bad,bca,bcd,bda,bdc,cab,cad,cba,cbd,cda,cdb,dab,dac,dba,dbc,dca,dcb,' )
def test_split_output(self): """test multiple-output checksums""" # outputs out of date cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') pipeline_run([split1], verbose=0) time.sleep(.5) with open(input_file, 'w') as outfile: outfile.write('testme') for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [split1], verbose=5, checksum_level=chksm) self.assertIn('Job needs update:', s.getvalue()) # all outputs incorrectly generated cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') time.sleep(.5) for f in split1_outputs: with open(f, 'w') as outfile: outfile.write('testme') for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [split1], verbose=5, checksum_level=chksm) if chksm >= CHECKSUM_HISTORY_TIMESTAMPS: self.assertIn('Job needs update:', s.getvalue()) self.assertIn('Previous incomplete run leftover', s.getvalue()) else: self.assertIn('Job up-to-date', s.getvalue()) # one output incorrectly generated cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') pipeline_run([split1], verbose=0) job_history = dbdict.open(RUFFUS_HISTORY_FILE, picklevalues=True) del job_history[split1_outputs[0]] for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [split1], verbose=5, checksum_level=chksm) if chksm >= CHECKSUM_HISTORY_TIMESTAMPS: self.assertIn('Job needs update:', s.getvalue()) self.assertIn('Previous incomplete run leftover', s.getvalue()) else: self.assertIn('Job up-to-date', s.getvalue())
def test_recreate_job_history(self): """Run""" global throw_exception throw_exception = None cleanup_tmpdir() # # print "Initial run without creating sqlite file" # pipeline_run([check_task4], verbose=0, checksum_level=CHECKSUM_FILE_TIMESTAMPS, multithread=10, one_second_per_job=one_second_per_job, pipeline="main") # # print "printout without sqlite" # s = StringIO() pipeline_printout( s, [check_task4], checksum_level=CHECKSUM_FILE_TIMESTAMPS, pipeline="main") self.assertTrue(not re.search( 'Tasks which will be run:.*\n(.*\n)*Task = ', s.getvalue())) # # print "printout expecting sqlite file" # s = StringIO() pipeline_printout(s, [check_task4], pipeline="main") self.assertTrue( re.search('Tasks which will be run:.*\n(.*\n)*Task = ', s.getvalue())) # # print "Regenerate sqlite file" # pipeline_run([check_task4], checksum_level=CHECKSUM_FILE_TIMESTAMPS, history_file=get_default_history_file_name(), multithread=1, verbose=0, touch_files_only=2, one_second_per_job=one_second_per_job, pipeline="main") # # print "printout expecting sqlite file" # s = StringIO() pipeline_printout(s, [check_task4], verbose=VERBOSITY, pipeline="main") succeed = not re.search( 'Tasks which will be run:.*\n(.*\n)*Task = ', s.getvalue()) if not succeed: print(s.getvalue(), file=sys.stderr) self.assertTrue(succeed) throw_exception = False
def test_combinations_with_replacement3_run(self): """Run product""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() pipeline_run([check_combinations_with_replacement3_merged_task], verbose=0, multiprocess=100, one_second_per_job=one_second_per_job, pipeline="main") with open(tempdir + "/merged.results") as oo: self.assertEqual( oo.read(), 'aaa,aab,aac,aad,abb,abc,abd,acc,acd,add,bbb,bbc,bbd,bcc,bcd,bdd,ccc,ccd,cdd,ddd,' )
def test_collate(self): self.cleanup_tmpdir() s = StringIO() pipeline_printout(s, [combine_results], verbose=5, wrap_width=10000, pipeline="main") self.assertTrue( re.search('Job needs update:.*Missing files.*', s.getvalue(), re.DOTALL) is not None) # print s.getvalue() pipeline_run([combine_results], verbose=0, pipeline="main")
def main(): # because of ruffus, have to use some global variables # global variables: options, config, samples, env, logger, logger_mutex # minimize the number of global variables as much as possible global options, config options = parse_args_for_rp_run() config = misc.get_config(options.config_file) global samples G = PPR.gen_all_samples_from_soft_and_isamp samples = G(options.soft_files, options.isamp, config) PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR']) PPR.fetch_sras_info(samples, options.recreate_sras_info) top_outdir = config['LOCAL_TOP_OUTDIR'] cmd_df = config['LOCAL_CMD_DF'] min_free = misc.ugly_usage(config['LOCAL_MIN_FREE']) max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE']) free_to_use = calc_local_free_space_to_use(top_outdir, cmd_df, min_free, max_usage) logger.info('Selecting samples to process based their usage') samples = PPR.select_gsms_to_process(samples, free_to_use) if not samples: # when samples == [] logger.info('Cannot find a GSM that fits the disk usage rule') return logger.info('GSMs to process:') for k, gsm in enumerate(samples): logger.info('\t{0:3d} {1:30s} {2}'.format(k + 1, gsm, gsm.outdir)) if 'gen_qsub_script' in options.target_tasks: if not options.qsub_template: raise IOError( '-t/--qsub_template required when running gen_qsub_script') R.pipeline_run( logger=logger, target_tasks=options.target_tasks, forcedtorun_tasks=options.forced_tasks, multiprocess=options.jobs, verbose=options.verbose, touch_files_only=options.touch_files_only, # history_file=os.path.join('log', '.{0}.sqlite'.format( # '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)]))) )
def main(): # because of ruffus, have to use some global variables # global variables: options, config, samples, env, logger, logger_mutex # minimize the number of global variables as much as possible global options, config options = parse_args_for_rp_run() config = misc.get_config(options.config_file) global samples G = PPR.gen_all_samples_from_soft_and_isamp samples = G(options.soft_files, options.isamp, config) PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR']) PPR.fetch_sras_info(samples, options.recreate_sras_info) top_outdir = config['LOCAL_TOP_OUTDIR'] cmd_df = config['LOCAL_CMD_DF'] min_free = misc.ugly_usage(config['LOCAL_MIN_FREE']) max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE']) free_to_use = calc_local_free_space_to_use( top_outdir, cmd_df, min_free, max_usage) logger.info('Selecting samples to process based their usage') samples = PPR.select_gsms_to_process(samples, free_to_use) if not samples: # when samples == [] logger.info('Cannot find a GSM that fits the disk usage rule') return logger.info('GSMs to process:') for k, gsm in enumerate(samples): logger.info('\t{0:3d} {1:30s} {2}'.format(k+1, gsm, gsm.outdir)) if 'gen_qsub_script' in options.target_tasks: if not options.qsub_template: raise IOError('-t/--qsub_template required when running gen_qsub_script') R.pipeline_run( logger=logger, target_tasks=options.target_tasks, forcedtorun_tasks=options.forced_tasks, multiprocess=options.jobs, verbose=options.verbose, touch_files_only=options.touch_files_only, # history_file=os.path.join('log', '.{0}.sqlite'.format( # '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)]))) )
def test_ruffus (self): # run first pipeline_run(verbose = 0) # should now be out of date s = StringIO() pipeline_printout(s, verbose = 5) ret = s.getvalue() try: self.do_assertRegexpMatches(ret, r"Tasks which are up-to-date:(\n\s*)*Task = 'test_check_if_uptodate.task1'(\n\s*)*Task = 'test_check_if_uptodate.task2'") except: print ("\n\tOops: Both tasks should be up to date!!\n\n") raise try: self.do_assertNotRegexpMatches(ret, r"Jobs needs update:\s*No function to check if up-to-date") except: print ("\n\tOops: @check_if_uptodate is not being picked up!!\n\n") raise
def test_ouput_up_to_date_func_changed(self): """Input file exists, output up to date, function body changed""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') pipeline_run([transform1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS) transform1.func_code = split1.func_code # simulate source change for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [transform1], verbose=5, checksum_level=chksm) if chksm >= CHECKSUM_FUNCTIONS: self.assertIn('Job needs update:', s.getvalue()) self.assertIn('Pipeline function has changed', s.getvalue()) else: self.assertIn('Job up-to-date', s.getvalue())
def test_ouput_up_to_date_param_changed(self): """Input file exists, output up to date, parameter to function changed""" # output is up to date, but function body changed (e.g., source different) cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') pipeline_run([transform1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS) runtime_data.append('different') # simulate change to config file for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [transform1], verbose=5, checksum_level=chksm) if chksm >= CHECKSUM_FUNCTIONS_AND_PARAMS: self.assertIn('Job needs update:', s.getvalue()) self.assertIn('Pipeline parameters have changed', s.getvalue()) else: self.assertIn('Job up-to-date', s.getvalue())
def test_merge_output(self): """test multiple-input checksums""" # one output incorrectly generated cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') pipeline_run([split1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS, pipeline="main") job_history = dbdict.open(get_default_history_file_name(), picklevalues=True) del job_history[os.path.relpath(split1_outputs[0])] for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [merge2], verbose=6, checksum_level=chksm, pipeline="main") if chksm >= CHECKSUM_HISTORY_TIMESTAMPS: self.assertIn('Job needs update:', s.getvalue()) self.assertIn('left over from a failed run?', s.getvalue()) else: #self.assertIn('Job up-to-date', s.getvalue()) pass # make sure the jobs run fine cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') pipeline_run([merge2], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS, pipeline="main") for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [merge2], verbose=6, checksum_level=chksm, pipeline="main") #self.assertIn('Job up-to-date', s.getvalue()) self.assertNotIn('Job needs update:', s.getvalue()) self.assertNotIn('left over from a failed run?', s.getvalue())
def test_output_up_to_date(self): """Input file exists, output up to date""" # output is up to date-- not run for any levels cleanup_tmpdir() with open(input_file, 'w') as outfile: outfile.write('testme') pipeline_run([transform1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS, pipeline="main") for chksm in possible_chksms: s = StringIO() pipeline_printout(s, [transform1], verbose=6, checksum_level=chksm, pipeline="main") #self.assertIn('Job up-to-date', s.getvalue()) pass
def test_ruffus(self): # # Run task 1 only # print(" Run start_task only", file=sys.stderr) pipeline_run(log_exceptions=True, verbose=0, pipeline="main") # # Run task 3 only # print( " Run final_task: linked_file_name_task should run as well", file=sys.stderr) pipeline_run(log_exceptions=True, verbose=0, pipeline="main") # # Run task 3 again: # # All jobs should be up to date # print(" Run final_task again: All jobs should be up to date", file=sys.stderr) pipeline_run(log_exceptions=True, verbose=0, pipeline="main") # # Make sure right number of jobs / tasks ran # for task_name, jobs_count in ({'start_task': 1, 'final_task': 4, 'linked_file_name_task': 2}).items(): if task_name not in executed_tasks_proxy: raise Exception("Error: %s did not run!!" % task_name) if executed_tasks_proxy[task_name] != jobs_count: raise Exception("Error: %s did not have %d jobs!!" % (task_name, jobs_count)) if "same_file_name_task" in executed_tasks_proxy: raise Exception("Error: %s should not have run!!" % "same_file_name_task")