Пример #1
0
 def test_transform_with_missing_formatter_args(self):
     s = StringIO()
     pipeline_printout(s, [transform_with_missing_formatter_args],
                       verbose=4, wrap_width=10000, pipeline="main")
     self.assertIn("Unmatched field {dynamic_message}", s.getvalue())
     pipeline_run([transform_with_missing_formatter_args],
                  verbose=0, pipeline="main")
    def test_output_up_to_date_func_changed(self):
        """Input file exists, output up to date, function body changed"""
        # output is up to date, but function body changed (e.g., source different)
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        pipeline_run([transform1], verbose=0,
                     checksum_level=CHECKSUM_HISTORY_TIMESTAMPS, pipeline="main")
        # simulate source change
        if sys.hexversion >= 0x03000000:
            split1.__code__, transform1.__code__ = transform1.__code__, split1.__code__
        else:
            split1.func_code, transform1.func_code = transform1.func_code, split1.func_code

        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [transform1], verbose=6,
                              checksum_level=chksm, pipeline="main")
            if chksm >= CHECKSUM_FUNCTIONS:
                self.assertIn('Job needs update:', s.getvalue())
                self.assertIn('Pipeline function has changed',
                              s.getvalue())
            else:
                #self.assertIn('Job up-to-date', s.getvalue())
                pass
        # clean up our function-changing mess!
        if sys.hexversion >= 0x03000000:
            split1.__code__, transform1.__code__ = transform1.__code__, split1.__code__
        else:
            split1.func_code, transform1.func_code = transform1.func_code, split1.func_code
    def test_raises_error(self):
        """run a function that fails but creates output, then check what should run"""
        # output is up to date, but function body changed (e.g., source different)
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        time.sleep(.5)
        del runtime_data[:]
        # poo. Shouldn't this be RuntimeError?
        with self.assertRaises(RethrownJobError):
            # generates output then fails
            pipeline_run([transform_raise_error], verbose=0,
                         checksum_level=CHECKSUM_HISTORY_TIMESTAMPS, pipeline="main")

        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [transform_raise_error],
                              verbose=6, checksum_level=chksm, pipeline="main")
            if chksm >= CHECKSUM_HISTORY_TIMESTAMPS:
                self.assertIn('Job needs update:', s.getvalue())
                self.assertIn('left over from a failed run?',
                              s.getvalue())
            else:
                #self.assertIn('Job up-to-date', s.getvalue())
                pass
Пример #4
0
 def test_mkdir_run(self):
     """Run mkdir"""
     # output is up to date, but function body changed (e.g., source different)
     cleanup_tmpdir()
     pipeline_run([test_transform, test_transform2],
                  verbose=6,
                  multiprocess=2)
Пример #5
0
 def test_combinations3_run(self):
     """Run product"""
     # output is up to date, but function body changed (e.g., source different)
     cleanup_tmpdir()
     pipeline_run([test_combinations3_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job)
     self.assertEqual(open(workdir +  "/merged.results").read(),
                      "abc,abd,acd,bcd,")
Пример #6
0
    def test_newstyle_mkdir_run(self):
        test_pipeline = Pipeline("test")

        test_pipeline.split(task_func=generate_initial_files1,
                            input=1,
                            output=[
                                tempdir + "/" + prefix + "_name.tmp1"
                                for prefix in "abcd"
                            ])

        test_pipeline.transform( task_func = test_transform,
                                 input     = generate_initial_files1,
                                 filter    = formatter(),
                                 output    = "{path[0]}/{basename[0]}.dir/{basename[0]}.tmp2")\
            .mkdir(tempdir + "/test1")\
            .mkdir(tempdir + "/test2")\
            .mkdir(generate_initial_files1, formatter(),
                        ["{path[0]}/{basename[0]}.dir", 3, "{path[0]}/{basename[0]}.dir2"])

        test_pipeline.mkdir(test_transform2, tempdir + "/test3")\
            .mkdir(generate_initial_files1, formatter(),
                    "{path[0]}/{basename[0]}.dir2")
        cleanup_tmpdir()
        pipeline_run([test_transform, test_transform2],
                     verbose=0,
                     multiprocess=2,
                     pipeline="main")
Пример #7
0
 def test_permutations3_run(self):
     """Run product"""
     # output is up to date, but function body changed (e.g., source different)
     cleanup_tmpdir()
     pipeline_run([test_permutations3_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job)
     self.assertEqual(open(workdir +  "/merged.results").read(),
                      'abc,abd,acb,acd,adb,adc,bac,bad,bca,bcd,bda,bdc,cab,cad,cba,cbd,cda,cdb,dab,dac,dba,dbc,dca,dcb,')
Пример #8
0
    def test_raises_error(self):
        """run a function that fails but creates output, then check what should run"""
        # output is up to date, but function body changed (e.g., source different)
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        time.sleep(.5)
        del runtime_data[:]
        # poo. Shouldn't this be RuntimeError?
        with self.assertRaises(RethrownJobError):
            # generates output then fails
            pipeline_run([transform_raise_error],
                         verbose=0,
                         checksum_level=CHECKSUM_HISTORY_TIMESTAMPS,
                         pipeline="main")

        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [transform_raise_error],
                              verbose=6,
                              checksum_level=chksm,
                              pipeline="main")
            if chksm >= CHECKSUM_HISTORY_TIMESTAMPS:
                self.assertIn('Job needs update:', s.getvalue())
                self.assertIn('left over from a failed run?', s.getvalue())
            else:
                #self.assertIn('Job up-to-date', s.getvalue())
                pass
Пример #9
0
 def test_job_history_with_exceptions_run(self):
     """Run"""
     for i in range(1):
         cleanup_tmpdir()
         try:
             pipeline_run(
                 [test_task4],
                 verbose=0,
                 #multithread = 2,
                 one_second_per_job=one_second_per_job,
                 pipeline="main")
         except:
             pass
         s = StringIO()
         pipeline_printout(s, [test_task4],
                           verbose=VERBOSITY,
                           wrap_width=10000,
                           pipeline="main")
         #
         # task 2 should be up to date because exception was throw in task 3
         #
         pipeline_printout_str = s.getvalue()
         correct_order = not re.search(
             'Tasks which will be run:.*\n(.*\n)*Task = test_task2',
             pipeline_printout_str)
         if not correct_order:
             print(pipeline_printout_str)
         self.assertTrue(correct_order)
         sys.stderr.write(".")
     print()
Пример #10
0
 def test_combinations_with_replacement3_run(self):
     """Run product"""
     # output is up to date, but function body changed (e.g., source different)
     cleanup_tmpdir()
     pipeline_run([test_combinations_with_replacement3_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job)
     self.assertEqual(open(workdir +  "/merged.results").read(),
                      'aaa,aab,aac,aad,abb,abc,abd,acc,acd,add,bbb,bbc,bbd,bcc,bcd,bdd,ccc,ccd,cdd,ddd,')
Пример #11
0
    def test_task (self):
        pipeline_run([task4], multiprocess = 10, verbose = 0, pipeline= "main")

        correct_output = "{tempdir}a.1.output:test_transform_inputs.py,{tempdir}a.1,{tempdir}c.1,{tempdir}d.1;{tempdir}b.1.output:test_transform_inputs.py,{tempdir}b.1,{tempdir}c.1,{tempdir}d.1;".format(tempdir = tempdir)
        with open(tempdir + "final.output") as ff:
            real_output = ff.read()
        self.assertEqual(correct_output, real_output)
Пример #12
0
    def test_merge_output(self):
        """test multiple-input checksums"""
        # one output incorrectly generated
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        pipeline_run([split1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS)
        job_history = dbdict.open(get_default_history_file_name(), picklevalues=True)
        del job_history[os.path.relpath(split1_outputs[0])]

        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [merge2], verbose=5, checksum_level=chksm)
            if chksm >= CHECKSUM_HISTORY_TIMESTAMPS:
                self.assertIn('Job needs update:', s.getvalue())
                self.assertIn('Previous incomplete run leftover', s.getvalue())
            else:
                self.assertIn('Job up-to-date', s.getvalue())

        # make sure the jobs run fine
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        pipeline_run([merge2], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS)
        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [merge2], verbose=5, checksum_level=chksm)
            self.assertIn('Job up-to-date', s.getvalue())
            self.assertNotIn('Job needs update:', s.getvalue())
            self.assertNotIn('Previous incomplete run leftover', s.getvalue())
 def test_no_re_match(self):
     try:
         pipeline_run(multiprocess=10, verbose=0, pipeline="main")
     except:
         return
     raise Exception(
         "Inputs(...) with multiple arguments should have thrown an exception")
Пример #14
0
    def test_output_up_to_date_func_changed(self):
        """Input file exists, output up to date, function body changed"""
        # output is up to date, but function body changed (e.g., source different)
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        pipeline_run([transform1],
                     verbose=0,
                     checksum_level=CHECKSUM_HISTORY_TIMESTAMPS,
                     pipeline="main")
        # simulate source change
        if sys.hexversion >= 0x03000000:
            split1.__code__, transform1.__code__ = transform1.__code__, split1.__code__
        else:
            split1.func_code, transform1.func_code = transform1.func_code, split1.func_code

        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [transform1],
                              verbose=6,
                              checksum_level=chksm,
                              pipeline="main")
            if chksm >= CHECKSUM_FUNCTIONS:
                self.assertIn('Job needs update:', s.getvalue())
                self.assertIn('Pipeline function has changed', s.getvalue())
            else:
                #self.assertIn('Job up-to-date', s.getvalue())
                pass
        # clean up our function-changing mess!
        if sys.hexversion >= 0x03000000:
            split1.__code__, transform1.__code__ = transform1.__code__, split1.__code__
        else:
            split1.func_code, transform1.func_code = transform1.func_code, split1.func_code
Пример #15
0
def ruffus_main(options, args):
    'Main entry point for ruffus pipelines'
    if options.just_print:
        pipeline_printout(
            sys.stdout,
            options.target_tasks,
            options.forced_tasks,
            verbose=options.verbose)
    elif options.flowchart:
        pipeline_printout_graph(
            open(options.flowchart, "w"),
            # use flowchart file name extension to decide flowchart format
            #   e.g. svg, jpg etc.
            os.path.splitext(options.flowchart)[1][1:],
            options.target_tasks,
            options.forced_tasks,
            no_key_legend=not options.key_legend_in_graph)
    else:
        pipeline_run(
            options.target_tasks,
            options.forced_tasks,
            multiprocess=options.jobs,
            logger=main_logger,
            verbose=options.verbose,
            touch_files_only=options.touch_only)
Пример #16
0
    def test_ruffus (self):
        pipeline_run(multiprocess = 50, verbose = 0, pipeline= "main")

        with open(os.path.join(data_dir, "summary.5")) as ii:
            active_text = ii.read()
        if active_text != expected_active_text:
            raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n"  % (expected_active_text, active_text))
Пример #17
0
 def test_product_run(self):
     """Run product"""
     # output is up to date, but function body changed (e.g., source different)
     cleanup_tmpdir()
     pipeline_run([test_product_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job)
     self.assertEqual(open(workdir +  "/merged.results").read(),
                      "aeg,aeh,afg,afh,beg,beh,bfg,bfh,ceg,ceh,cfg,cfh,deg,deh,dfg,dfh,")
Пример #18
0
    def test_split_output(self):
        """test multiple-output checksums"""
        # outputs out of date
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        pipeline_run([split1], verbose=0,
                     checksum_level=CHECKSUM_HISTORY_TIMESTAMPS, pipeline="main")
        time.sleep(.5)
        with open(input_file, 'w') as outfile:
            outfile.write('testme')

        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [split1], verbose=6,
                              checksum_level=chksm, pipeline="main")
            self.assertIn('Job needs update:', s.getvalue())

        # all outputs incorrectly generated
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        time.sleep(.5)
        for f in split1_outputs:
            with open(f, 'w') as outfile:
                outfile.write('testme')
        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [split1], verbose=6,
                              checksum_level=chksm, pipeline="main")
            if chksm >= CHECKSUM_HISTORY_TIMESTAMPS:
                self.assertIn('Job needs update:', s.getvalue())
                self.assertIn('left over from a failed run?',
                              s.getvalue())
            else:
                #self.assertIn('Job up-to-date', s.getvalue())
                pass

        # one output incorrectly generated
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        pipeline_run([split1], verbose=0,
                     checksum_level=CHECKSUM_HISTORY_TIMESTAMPS, pipeline="main")
        job_history = dbdict.open(
            get_default_history_file_name(), picklevalues=True)
        del job_history[os.path.relpath(split1_outputs[0])]

        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [split1], verbose=6,
                              checksum_level=chksm, pipeline="main")
            if chksm >= CHECKSUM_HISTORY_TIMESTAMPS:
                self.assertIn('Job needs update:', s.getvalue())
                self.assertIn('left over from a failed run?',
                              s.getvalue())
            else:
                #self.assertIn('Job up-to-date', s.getvalue())
                pass
Пример #19
0
 def test_combinations_with_replacement2_run(self):
     """Run product"""
     # output is up to date, but function body changed (e.g., source different)
     cleanup_tmpdir()
     pipeline_run([test_combinations_with_replacement2_merged_task], verbose=0, multiprocess = 100, one_second_per_job = one_second_per_job, pipeline= "main")
     with open(tempdir +  "/merged.results") as oo:
         self.assertEqual(oo.read(),
                      "aa,ab,ac,ad,bb,bc,bd,cc,cd,dd,")
Пример #20
0
 def test_suffix_unmatched_run2(self):
     """Run transform(...,suffix()...)"""
     # output is up to date, but function body changed (e.g., source different)
     cleanup_tmpdir()
     pipeline_run([test_suffix_unmatched_task2],
                  verbose=0,
                  multiprocess=parallelism,
                  one_second_per_job=one_second_per_job)
    def test_no_re_match (self):

        save_to_str_logger = t_save_to_str_logger()
        pipeline_run(multiprocess = 10, logger = save_to_str_logger, verbose = 1, pipeline= "main")

        print(save_to_str_logger.warning_str)
        self.assertTrue("no file names matched" in save_to_str_logger.warning_str)
        print("\n    Warning printed out correctly", file=sys.stderr)
 def test_no_re_match(self):
     try:
         pipeline_run(multiprocess=10, verbose=0, pipeline="main")
     except:
         return
     raise Exception(
         "Inputs(...) with multiple arguments should have thrown an exception"
     )
Пример #23
0
    def test_task(self):
        pipeline_run([task4], multiprocess=10, verbose=0, pipeline="main")

        correct_output = "{tempdir}a.1.output:test_transform_inputs.py,{tempdir}a.1,{tempdir}c.1,{tempdir}d.1;{tempdir}b.1.output:test_transform_inputs.py,{tempdir}b.1,{tempdir}c.1,{tempdir}d.1;".format(
            tempdir=tempdir)
        with open(tempdir + "final.output") as ff:
            real_output = ff.read()
        self.assertEqual(correct_output, real_output)
Пример #24
0
    def test_ruffus(self):
        pipeline_run(multiprocess=50, verbose=0, pipeline="main")

        with open(os.path.join(data_dir, "summary.5")) as ii:
            active_text = ii.read()
        if active_text != expected_active_text:
            raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" %
                            (expected_active_text, active_text))
Пример #25
0
 def test_exception_logging(self):
     try:
         pipeline_run(multiprocess=50, verbose=0, pipeline="main")
     except ruffus.ruffus_exceptions.RethrownJobError as e:
         log.info(e)
         for exc in e.args:
             task_name, job_name, exc_name, exc_value, exc_stack = exc
         return
     raise Exception("Missing exception")
    def test_task (self):

        save_to_str_logger = t_save_to_str_logger()
        pipeline_run(multiprocess = 10,
                            logger = save_to_str_logger,
                            verbose = 1,
                     pipeline= "main")
        self.assertTrue("@files() was empty" in save_to_str_logger.warning_str)
        print("\n    Warning printed out correctly", file=sys.stderr)
Пример #27
0
 def test_exception_logging(self):
     try:
         pipeline_run(multiprocess = 50, verbose = 0, pipeline= "main")
     except ruffus.ruffus_exceptions.RethrownJobError as e:
         log.info(e)
         for exc in e.args:
             task_name, job_name, exc_name, exc_value, exc_stack = exc
         return
     raise Exception("Missing exception")
Пример #28
0
    def test_active_if_true (self):
        global pipeline_active_if
        pipeline_active_if = True
        pipeline_run(multiprocess = 50, verbose = 0, pipeline= "main")

        with open("test_active_if/summary.5") as ii:
            active_text = ii.read()
        if active_text != expected_active_text:
            raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n"  % (active_text, expected_active_text))
    def test_collate(self):
        self.cleanup_tmpdir()

        s = StringIO()
        pipeline_printout(s, [combine_results], verbose=5, wrap_width = 10000)
        self.assertTrue('Job needs update: Missing files\n' in s.getvalue())
        #print s.getvalue()

        pipeline_run([combine_results], verbose=0)
Пример #30
0
    def test_collate(self):
        self.cleanup_tmpdir()

        s = StringIO()
        pipeline_printout(s, [combine_results], verbose=5, wrap_width=10000)
        self.assertTrue('Job needs update: Missing files\n' in s.getvalue())
        #print s.getvalue()

        pipeline_run([combine_results], verbose=0)
Пример #31
0
 def test_active_if_false (self):
     global pipeline_active_if
     pipeline_active_if = False
     pipeline_run(multiprocess = 50, verbose = 0, pipeline= "main")
     with open("test_active_if/summary.5") as ii:
         inactive_text = ii.read()
     if inactive_text != expected_inactive_text:
         raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n"  % (inactive_text, expected_inactive_text))
         shutil.rmtree("test_active_if")
Пример #32
0
 def test_permutations2_run(self):
     """Run product"""
     # output is up to date, but function body changed (e.g., source different)
     cleanup_tmpdir()
     pipeline_run([check_permutations2_merged_task], verbose=0, multiprocess=100,
                  one_second_per_job=one_second_per_job, pipeline="main")
     with open(tempdir + "/merged.results") as oo:
         self.assertEqual(oo.read(),
                          "ab,ac,ad,ba,bc,bd,ca,cb,cd,da,db,dc,")
    def test_collate(self):
        self.cleanup_tmpdir()

        s = StringIO()
        pipeline_printout(s, [combine_results], verbose=5, wrap_width = 10000, pipeline= "main")
        self.assertTrue(re.search('Job needs update:.*Missing files.*', s.getvalue(), re.DOTALL) is not None)
        #print s.getvalue()

        pipeline_run([combine_results], verbose=0, pipeline= "main")
Пример #34
0
    def test_task(self):

        save_to_str_logger = t_save_to_str_logger()
        pipeline_run(multiprocess=10,
                     logger=save_to_str_logger,
                     verbose=1,
                     pipeline="main")
        self.assertTrue("@files() was empty" in save_to_str_logger.warning_str)
        print("\n    Warning printed out correctly", file=sys.stderr)
Пример #35
0
    def test_recreate_job_history(self):
        """Run"""
        global throw_exception
        throw_exception = None
        cleanup_tmpdir()

        #
        #      print "Initial run without creating sqlite file"
        #
        pipeline_run([test_task4],
                     verbose=0,
                     checksum_level=CHECKSUM_FILE_TIMESTAMPS,
                     multithread=10,
                     one_second_per_job=one_second_per_job,
                     pipeline="main")

        #
        #   print "printout without sqlite"
        #
        s = StringIO()
        pipeline_printout(s, [test_task4],
                          checksum_level=CHECKSUM_FILE_TIMESTAMPS,
                          pipeline="main")
        self.assertTrue(not re.search(
            'Tasks which will be run:.*\n(.*\n)*Task = ', s.getvalue()))
        #
        # print "printout expecting sqlite file"
        #
        s = StringIO()
        pipeline_printout(s, [test_task4], pipeline="main")
        self.assertTrue(
            re.search('Tasks which will be run:.*\n(.*\n)*Task = ',
                      s.getvalue()))
        #
        #   print "Regenerate sqlite file"
        #
        pipeline_run([test_task4],
                     checksum_level=CHECKSUM_FILE_TIMESTAMPS,
                     history_file=get_default_history_file_name(),
                     multithread=1,
                     verbose=0,
                     touch_files_only=2,
                     one_second_per_job=one_second_per_job,
                     pipeline="main")
        #
        # print "printout expecting sqlite file"
        #
        s = StringIO()
        pipeline_printout(s, [test_task4], verbose=VERBOSITY, pipeline="main")
        succeed = not re.search('Tasks which will be run:.*\n(.*\n)*Task = ',
                                s.getvalue())
        if not succeed:
            print(s.getvalue(), file=sys.stderr)
        self.assertTrue(succeed)

        throw_exception = False
Пример #36
0
 def test_combinations_with_replacement2_run(self):
     """Run product"""
     # output is up to date, but function body changed (e.g., source different)
     cleanup_tmpdir()
     pipeline_run([test_combinations_with_replacement2_merged_task],
                  verbose=0,
                  multiprocess=100,
                  one_second_per_job=one_second_per_job)
     with open(workdir + "/merged.results") as oo:
         self.assertEqual(oo.read(), "aa,ab,ac,ad,bb,bc,bd,cc,cd,dd,")
Пример #37
0
def do_main():
    print("Press Ctrl-C Now!!", file=sys.stdout)
    sys.stdout.flush()
    time.sleep(2)
    print("Start....", file=sys.stdout)
    sys.stdout.flush()
    ruffus.pipeline_run(verbose=11, multiprocess=5, pipeline="main")
    print("too late!!", file=sys.stdout)
    sys.stdout.flush()
    cleanup_tmpdir()
    def test_no_re_match(self):

        save_to_str_logger = t_save_to_str_logger()
        pipeline_run(multiprocess=10, logger=save_to_str_logger,
                     verbose=1, pipeline="main")

        print(save_to_str_logger.warning_str)
        self.assertTrue(
            "no file names matched" in save_to_str_logger.warning_str)
        print("\n    Warning printed out correctly", file=sys.stderr)
Пример #39
0
 def test_active_if_false(self):
     global pipeline_active_if
     pipeline_active_if = False
     pipeline_run(multiprocess=50, verbose=0, pipeline="main")
     with open("test_active_if/summary.5") as ii:
         inactive_text = ii.read()
     if inactive_text != expected_inactive_text:
         raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" %
                         (inactive_text, expected_inactive_text))
         shutil.rmtree("test_active_if")
Пример #40
0
    def test_active_if_true(self):
        global pipeline_active_if
        pipeline_active_if = True
        pipeline_run(multiprocess=50, verbose=0, pipeline="main")

        with open("test_active_if/summary.5") as ii:
            active_text = ii.read()
        if active_text != expected_active_text:
            raise Exception("Error:\n\tExpected\n%s\nInstead\n%s\n" %
                            (active_text, expected_active_text))
Пример #41
0
 def test_transform_with_missing_formatter_args(self):
     s = StringIO()
     pipeline_printout(s, [transform_with_missing_formatter_args],
                       verbose=4,
                       wrap_width=10000,
                       pipeline="main")
     self.assertIn("Unmatched field {dynamic_message}", s.getvalue())
     pipeline_run([transform_with_missing_formatter_args],
                  verbose=0,
                  pipeline="main")
Пример #42
0
def main():
    parser = cmdline.get_argparse(description="Trench Run pipeline")

    args = parser.parse_args()

    if args.target_tasks:
        cmdline.run(args)

    else:
        pipeline_run(publish_data)
Пример #43
0
 def test_permutations2_run(self):
     """Run product"""
     # output is up to date, but function body changed (e.g., source different)
     cleanup_tmpdir()
     pipeline_run([check_permutations2_merged_task],
                  verbose=0,
                  multiprocess=100,
                  one_second_per_job=one_second_per_job,
                  pipeline="main")
     with open(tempdir + "/merged.results") as oo:
         self.assertEqual(oo.read(), "ab,ac,ad,ba,bc,bd,ca,cb,cd,da,db,dc,")
Пример #44
0
 def test_combinations3_run(self):
     """Run product"""
     # output is up to date, but function body changed (e.g., source different)
     cleanup_tmpdir()
     pipeline_run([test_combinations3_merged_task],
                  verbose=0,
                  multiprocess=100,
                  one_second_per_job=one_second_per_job,
                  pipeline="main")
     with open(tempdir + "/merged.results") as oo:
         self.assertEqual(oo.read(), "abc,abd,acd,bcd,")
Пример #45
0
def do_main():
    print("Press Ctrl-C Now!!", file=sys.stdout)
    sys.stdout.flush()
    time.sleep(2)
    print("Start....", file=sys.stdout)
    sys.stdout.flush()
    ruffus.pipeline_run(verbose=11,
                        multiprocess=5, pipeline="main")
    print("too late!!", file=sys.stdout)
    sys.stdout.flush()
    cleanup_tmpdir()
Пример #46
0
    def test_ouput_up_to_date(self):
        """Input file exists, output up to date"""
        # output is up to date-- not run for any levels
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        pipeline_run([transform1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS)

        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [transform1], verbose=5, checksum_level=chksm)
            self.assertIn('Job up-to-date', s.getvalue())
Пример #47
0
 def test_product_run(self):
     """Run product"""
     # output is up to date, but function body changed (e.g., source different)
     cleanup_tmpdir()
     pipeline_run([test_product_merged_task],
                  verbose=0,
                  multiprocess=100,
                  one_second_per_job=one_second_per_job)
     with open(workdir + "/merged.results") as oo:
         self.assertEqual(
             oo.read(),
             "aeg,aeh,afg,afh,beg,beh,bfg,bfh,ceg,ceh,cfg,cfh,deg,deh,dfg,dfh,"
         )
Пример #48
0
 def test_permutations3_run(self):
     """Run product"""
     # output is up to date, but function body changed (e.g., source different)
     cleanup_tmpdir()
     pipeline_run([test_permutations3_merged_task],
                  verbose=0,
                  multiprocess=100,
                  one_second_per_job=one_second_per_job)
     with open(workdir + "/merged.results") as oo:
         self.assertEqual(
             oo.read(),
             'abc,abd,acb,acd,adb,adc,bac,bad,bca,bcd,bda,bdc,cab,cad,cba,cbd,cda,cdb,dab,dac,dba,dbc,dca,dcb,'
         )
    def test_split_output(self):
        """test multiple-output checksums"""
        # outputs out of date
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        pipeline_run([split1], verbose=0)
        time.sleep(.5)
        with open(input_file, 'w') as outfile:
            outfile.write('testme')

        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [split1], verbose=5, checksum_level=chksm)
            self.assertIn('Job needs update:', s.getvalue())

        # all outputs incorrectly generated
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        time.sleep(.5)
        for f in split1_outputs:
            with open(f, 'w') as outfile:
                outfile.write('testme')
        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [split1], verbose=5, checksum_level=chksm)
            if chksm >= CHECKSUM_HISTORY_TIMESTAMPS:
                self.assertIn('Job needs update:', s.getvalue())
                self.assertIn('Previous incomplete run leftover',
                              s.getvalue())
            else:
                self.assertIn('Job up-to-date', s.getvalue())

        # one output incorrectly generated
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        pipeline_run([split1], verbose=0)
        job_history = dbdict.open(RUFFUS_HISTORY_FILE, picklevalues=True)
        del job_history[split1_outputs[0]]

        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [split1], verbose=5, checksum_level=chksm)
            if chksm >= CHECKSUM_HISTORY_TIMESTAMPS:
                self.assertIn('Job needs update:', s.getvalue())
                self.assertIn('Previous incomplete run leftover',
                              s.getvalue())
            else:
                self.assertIn('Job up-to-date', s.getvalue())
    def test_recreate_job_history(self):
        """Run"""
        global throw_exception
        throw_exception = None
        cleanup_tmpdir()

        #
        #      print "Initial run without creating sqlite file"
        #
        pipeline_run([check_task4], verbose=0,
                     checksum_level=CHECKSUM_FILE_TIMESTAMPS,
                     multithread=10,
                     one_second_per_job=one_second_per_job, pipeline="main")

        #
        #   print "printout without sqlite"
        #
        s = StringIO()
        pipeline_printout(
            s, [check_task4], checksum_level=CHECKSUM_FILE_TIMESTAMPS, pipeline="main")
        self.assertTrue(not re.search(
            'Tasks which will be run:.*\n(.*\n)*Task = ', s.getvalue()))
        #
        # print "printout expecting sqlite file"
        #
        s = StringIO()
        pipeline_printout(s, [check_task4], pipeline="main")
        self.assertTrue(
            re.search('Tasks which will be run:.*\n(.*\n)*Task = ', s.getvalue()))
        #
        #   print "Regenerate sqlite file"
        #
        pipeline_run([check_task4],
                     checksum_level=CHECKSUM_FILE_TIMESTAMPS,
                     history_file=get_default_history_file_name(),
                     multithread=1,
                     verbose=0,
                     touch_files_only=2,
                     one_second_per_job=one_second_per_job, pipeline="main")
        #
        # print "printout expecting sqlite file"
        #
        s = StringIO()
        pipeline_printout(s, [check_task4], verbose=VERBOSITY, pipeline="main")
        succeed = not re.search(
            'Tasks which will be run:.*\n(.*\n)*Task = ', s.getvalue())
        if not succeed:
            print(s.getvalue(), file=sys.stderr)
        self.assertTrue(succeed)

        throw_exception = False
Пример #51
0
 def test_combinations_with_replacement3_run(self):
     """Run product"""
     # output is up to date, but function body changed (e.g., source different)
     cleanup_tmpdir()
     pipeline_run([check_combinations_with_replacement3_merged_task],
                  verbose=0,
                  multiprocess=100,
                  one_second_per_job=one_second_per_job,
                  pipeline="main")
     with open(tempdir + "/merged.results") as oo:
         self.assertEqual(
             oo.read(),
             'aaa,aab,aac,aad,abb,abc,abd,acc,acd,add,bbb,bbc,bbd,bcc,bcd,bdd,ccc,ccd,cdd,ddd,'
         )
Пример #52
0
    def test_collate(self):
        self.cleanup_tmpdir()

        s = StringIO()
        pipeline_printout(s, [combine_results],
                          verbose=5,
                          wrap_width=10000,
                          pipeline="main")
        self.assertTrue(
            re.search('Job needs update:.*Missing files.*', s.getvalue(),
                      re.DOTALL) is not None)
        # print s.getvalue()

        pipeline_run([combine_results], verbose=0, pipeline="main")
Пример #53
0
def main():
    # because of ruffus, have to use some global variables
    # global variables: options, config, samples, env, logger, logger_mutex
    # minimize the number of global variables as much as possible
    global options, config
    options = parse_args_for_rp_run()
    config = misc.get_config(options.config_file)

    global samples
    G = PPR.gen_all_samples_from_soft_and_isamp
    samples = G(options.soft_files, options.isamp, config)
    PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR'])
    PPR.fetch_sras_info(samples, options.recreate_sras_info)

    top_outdir = config['LOCAL_TOP_OUTDIR']
    cmd_df = config['LOCAL_CMD_DF']
    min_free = misc.ugly_usage(config['LOCAL_MIN_FREE'])
    max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE'])
    free_to_use = calc_local_free_space_to_use(top_outdir, cmd_df, min_free,
                                               max_usage)

    logger.info('Selecting samples to process based their usage')
    samples = PPR.select_gsms_to_process(samples, free_to_use)

    if not samples:  # when samples == []
        logger.info('Cannot find a GSM that fits the disk usage rule')
        return

    logger.info('GSMs to process:')
    for k, gsm in enumerate(samples):
        logger.info('\t{0:3d} {1:30s} {2}'.format(k + 1, gsm, gsm.outdir))

    if 'gen_qsub_script' in options.target_tasks:
        if not options.qsub_template:
            raise IOError(
                '-t/--qsub_template required when running gen_qsub_script')

    R.pipeline_run(
        logger=logger,
        target_tasks=options.target_tasks,
        forcedtorun_tasks=options.forced_tasks,
        multiprocess=options.jobs,
        verbose=options.verbose,
        touch_files_only=options.touch_files_only,
        # history_file=os.path.join('log', '.{0}.sqlite'.format(
        #     '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)])))
    )
Пример #54
0
def main():
    # because of ruffus, have to use some global variables
    # global variables: options, config, samples, env, logger, logger_mutex
    # minimize the number of global variables as much as possible
    global options, config
    options = parse_args_for_rp_run()
    config = misc.get_config(options.config_file)

    global samples
    G = PPR.gen_all_samples_from_soft_and_isamp
    samples = G(options.soft_files, options.isamp, config)
    PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR'])
    PPR.fetch_sras_info(samples, options.recreate_sras_info)

    top_outdir = config['LOCAL_TOP_OUTDIR']
    cmd_df = config['LOCAL_CMD_DF']
    min_free = misc.ugly_usage(config['LOCAL_MIN_FREE'])
    max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE'])
    free_to_use = calc_local_free_space_to_use(
        top_outdir, cmd_df, min_free, max_usage)

    logger.info('Selecting samples to process based their usage')
    samples = PPR.select_gsms_to_process(samples, free_to_use)

    if not samples:             # when samples == []
        logger.info('Cannot find a GSM that fits the disk usage rule')
        return 

    logger.info('GSMs to process:')
    for k, gsm in enumerate(samples):
        logger.info('\t{0:3d} {1:30s} {2}'.format(k+1, gsm, gsm.outdir))

    if 'gen_qsub_script' in options.target_tasks:
        if not options.qsub_template:
            raise IOError('-t/--qsub_template required when running gen_qsub_script')

    R.pipeline_run(
        logger=logger,
        target_tasks=options.target_tasks,
        forcedtorun_tasks=options.forced_tasks,
        multiprocess=options.jobs,
        verbose=options.verbose,
        touch_files_only=options.touch_files_only,
        # history_file=os.path.join('log', '.{0}.sqlite'.format(
        #     '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)])))
    )
Пример #55
0
    def test_ruffus (self):
        # run first
        pipeline_run(verbose = 0)
        # should now be out of date
        s = StringIO()
        pipeline_printout(s, verbose = 5)

        ret = s.getvalue()
        try:
            self.do_assertRegexpMatches(ret, r"Tasks which are up-to-date:(\n\s*)*Task = 'test_check_if_uptodate.task1'(\n\s*)*Task = 'test_check_if_uptodate.task2'")
        except:
            print ("\n\tOops: Both tasks should be up to date!!\n\n")
            raise
        try:
            self.do_assertNotRegexpMatches(ret, r"Jobs needs update:\s*No function to check if up-to-date")
        except:
            print ("\n\tOops: @check_if_uptodate is not being picked up!!\n\n")
            raise
Пример #56
0
    def test_ouput_up_to_date_func_changed(self):
        """Input file exists, output up to date, function body changed"""
        # output is up to date, but function body changed (e.g., source different)
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        pipeline_run([transform1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS)
        transform1.func_code = split1.func_code  # simulate source change

        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [transform1], verbose=5, checksum_level=chksm)
            if chksm >= CHECKSUM_FUNCTIONS:
                self.assertIn('Job needs update:', s.getvalue())
                self.assertIn('Pipeline function has changed',
                              s.getvalue())
            else:
                self.assertIn('Job up-to-date', s.getvalue())
Пример #57
0
    def test_ouput_up_to_date_param_changed(self):
        """Input file exists, output up to date, parameter to function changed"""
        # output is up to date, but function body changed (e.g., source different)
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        pipeline_run([transform1], verbose=0, checksum_level=CHECKSUM_HISTORY_TIMESTAMPS)
        runtime_data.append('different')  # simulate change to config file

        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [transform1], verbose=5, checksum_level=chksm)
            if chksm >= CHECKSUM_FUNCTIONS_AND_PARAMS:
                self.assertIn('Job needs update:', s.getvalue())
                self.assertIn('Pipeline parameters have changed',
                              s.getvalue())
            else:
                self.assertIn('Job up-to-date', s.getvalue())
Пример #58
0
    def test_merge_output(self):
        """test multiple-input checksums"""
        # one output incorrectly generated
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        pipeline_run([split1],
                     verbose=0,
                     checksum_level=CHECKSUM_HISTORY_TIMESTAMPS,
                     pipeline="main")
        job_history = dbdict.open(get_default_history_file_name(),
                                  picklevalues=True)
        del job_history[os.path.relpath(split1_outputs[0])]

        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [merge2],
                              verbose=6,
                              checksum_level=chksm,
                              pipeline="main")
            if chksm >= CHECKSUM_HISTORY_TIMESTAMPS:
                self.assertIn('Job needs update:', s.getvalue())
                self.assertIn('left over from a failed run?', s.getvalue())
            else:
                #self.assertIn('Job up-to-date', s.getvalue())
                pass

        # make sure the jobs run fine
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        pipeline_run([merge2],
                     verbose=0,
                     checksum_level=CHECKSUM_HISTORY_TIMESTAMPS,
                     pipeline="main")
        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [merge2],
                              verbose=6,
                              checksum_level=chksm,
                              pipeline="main")
            #self.assertIn('Job up-to-date', s.getvalue())
            self.assertNotIn('Job needs update:', s.getvalue())
            self.assertNotIn('left over from a failed run?', s.getvalue())
Пример #59
0
    def test_output_up_to_date(self):
        """Input file exists, output up to date"""
        # output is up to date-- not run for any levels
        cleanup_tmpdir()
        with open(input_file, 'w') as outfile:
            outfile.write('testme')
        pipeline_run([transform1],
                     verbose=0,
                     checksum_level=CHECKSUM_HISTORY_TIMESTAMPS,
                     pipeline="main")

        for chksm in possible_chksms:
            s = StringIO()
            pipeline_printout(s, [transform1],
                              verbose=6,
                              checksum_level=chksm,
                              pipeline="main")
            #self.assertIn('Job up-to-date', s.getvalue())
            pass
Пример #60
0
    def test_ruffus(self):
        #
        #   Run task 1 only
        #
        print("    Run start_task only", file=sys.stderr)
        pipeline_run(log_exceptions=True, verbose=0, pipeline="main")

        #
        #   Run task 3 only
        #
        print(
            "    Run final_task: linked_file_name_task should run as well", file=sys.stderr)
        pipeline_run(log_exceptions=True, verbose=0, pipeline="main")

        #
        #   Run task 3 again:
        #
        #       All jobs should be up to date
        #
        print("    Run final_task again: All jobs should be up to date",
              file=sys.stderr)
        pipeline_run(log_exceptions=True, verbose=0, pipeline="main")

        #
        #   Make sure right number of jobs / tasks ran
        #
        for task_name, jobs_count in ({'start_task': 1, 'final_task': 4, 'linked_file_name_task': 2}).items():
            if task_name not in executed_tasks_proxy:
                raise Exception("Error: %s did not run!!" % task_name)
            if executed_tasks_proxy[task_name] != jobs_count:
                raise Exception("Error: %s did not have %d jobs!!" %
                                (task_name, jobs_count))
        if "same_file_name_task" in executed_tasks_proxy:
            raise Exception("Error: %s should not have run!!" %
                            "same_file_name_task")