Exemplo n.º 1
0
    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEquals(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        w.add(b)
        w2.add(eb)
        logging.debug("RUNNING BROKEN WORKER")
        w2.run()
        self.assertFalse(a.complete())
        self.assertFalse(b.complete())
        logging.debug("RUNNING FUNCTIONAL WORKER")
        w.run()
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())
Exemplo n.º 2
0
    def test_worker_executes_fork_handler(self):
        class Stub(luigi.Task):
            def complete(self):
                return False

            def run(self):
                pass

        s = CentralPlannerScheduler()
        w = Worker(scheduler=s, worker_processes=2)
        w.add(Stub())
        w.run()

        time.sleep(0.1)

        worker_res = [self.worker_queue.get(), self.worker_queue.get(), self.worker_queue.get()]
        self.assertIn("both", worker_res)
        self.assertIn("master", worker_res)

        child_res = [self.child_queue.get(), self.child_queue.get(), self.child_queue.get()]
        self.assertIn("both", child_res)
        self.assertIn("child", child_res)

        self.assertTrue(self.worker_queue.empty())
        self.assertTrue(self.child_queue.empty())
Exemplo n.º 3
0
    def test_interleaved_workers3(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a
            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        w  = Worker(scheduler=sch, worker_id='X', keep_alive=True)
        w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, wait_interval=0.1)

        w.add(a)
        w2.add(b)

        threading.Thread(target=w.run).start()
        w2.run()

        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

        w.stop()
        w2.stop()
Exemplo n.º 4
0
    def test_complete_exception(self):
        "Tests that a task is still scheduled if its sister task crashes in the complete() method"
        class A(DummyTask):
            def complete(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):
            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        w.add(b)
        w.run()
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()
Exemplo n.º 5
0
    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEqual(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w2.add(eb))
        self.assertTrue(w.add(b))

        self.assertTrue(w2.run())
        self.assertFalse(b.complete())
        self.assertTrue(w.run())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()
Exemplo n.º 6
0
    def _run(self, dt, interval):
        w = Worker(scheduler=self.sch)
        tasks = self.gen_tasks(dt, interval)
        map(w.add, tasks)

        for t in filter(lambda x: isinstance(x, RunMetricRules), tasks):
            if t.handler == 'save_to_mongodb':
                if t.filter_rules:
                    filter_metrics_args = {"require": t, 'filter_rules': t.filter_rules}
                    # 过滤
                    filter_metrics = FilterMetrics(**filter_metrics_args)
                    require_task = filter_metrics
                    w.add(filter_metrics)
                else:
                    require_task = t

                # 保存
                kwargs = dict(**self.mongo_conf)
                kwargs.update({"require": require_task, "date": dt, 'app_id': self.app_id, 'interval': interval})
                save_metrics = SaveMetrics(**kwargs)
                w.add(save_metrics)
            else:
                kwargs = dict(**self.mongo_conf)
                kwargs.update({"require": t,
                               "date": dt,
                               'app_id': self.app_id,
                               'interval': interval,
                               'handler_func': t.handler})
                metric_handler = MetricsHandler(**kwargs)
                w.add(metric_handler)

        w.run()
Exemplo n.º 7
0
 def test_task_limit_exceeded(self):
     w = Worker()
     t = ForkBombTask(3, 2)
     w.add(t)
     w.run()
     self.assertFalse(t.complete())
     leaf_tasks = [ForkBombTask(3, 2, branch) for branch in [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1)]]
     self.assertEquals(3, sum(t.complete() for t in leaf_tasks), "should have gracefully completed as much as possible even though the single last leaf didn't get scheduled")
Exemplo n.º 8
0
class AssistantTest(unittest.TestCase):
    def run(self, result=None):
        self.sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True)
        with Worker(scheduler=self.sch, worker_id='X') as w:
            self.w = w
            super(AssistantTest, self).run(result)

    def test_get_work(self):
        d = Dummy2Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assistant.run()
        self.assertTrue(d.complete())

    def test_bad_job_type(self):
        class Dummy3Task(Dummy2Task):
            task_family = 'UnknownTaskFamily'

        d = Dummy3Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assertFalse(self.assistant.run())
        self.assertFalse(d.complete())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [d.task_id])

    def test_unimported_job_type(self):
        MODULE_CONTENTS = b'''
import luigi


class UnimportedTask(luigi.Task):
    def complete(self):
        return False
'''

        class NotImportedTask(luigi.Task):
            task_family = 'UnimportedTask'
            task_module = None

        task = NotImportedTask()

        # verify that it can't run the task without the module info necessary to import it
        self.w.add(task)
        self.assertFalse(self.assistant.run())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [task.task_id])

        # check that it can import with the right module
        with temporary_unloaded_module(MODULE_CONTENTS) as task.task_module:
            self.w.add(task)
            self.assertTrue(self.assistant.run())
            self.assertEqual(list(self.sch.task_list('DONE', '').keys()), [task.task_id])
Exemplo n.º 9
0
class MultiprocessWorkerTest(unittest.TestCase):

    def setUp(self):
        self.scheduler = RemoteScheduler()
        self.scheduler.add_worker = Mock()
        self.scheduler.add_task = Mock()
        self.worker = Worker(scheduler=self.scheduler, worker_id='X', worker_processes=2)

    def tearDown(self):
        self.worker.stop()

    def test_positive_path(self):
        a = DummyTask("a")
        b = DummyTask("b")

        class MultipleRequirementTask(DummyTask):

            def requires(self):
                return [a, b]

        c = MultipleRequirementTask("C")

        self.assertTrue(self.worker.add(c))

        self.scheduler.get_work = Mock(side_effect=[(3, str(a)), (2, str(b)), (1, str(c)), (0, None), (0, None)])

        self.assertTrue(self.worker.run())
        self.assertTrue(c.has_run)

    def test_path_with_task_failures(self):
        class FailingTask(DummyTask):

            def run(self):
                raise Exception("I am failing")

        a = FailingTask("a")
        b = FailingTask("b")

        class MultipleRequirementTask(DummyTask):

            def requires(self):
                return [a, b]

        c = MultipleRequirementTask("C")

        self.assertTrue(self.worker.add(c))

        self.scheduler.get_work = Mock(side_effect=[(3, str(a)), (2, str(b)), (1, str(c)), (0, None), (0, None)])

        self.assertFalse(self.worker.run())
Exemplo n.º 10
0
 def test_disabled_shutdown_hook(self):
     w = Worker(scheduler=self.sch, keep_alive=True, no_install_shutdown_handler=True)
     with w:
         try:
             # try to kill the worker!
             os.kill(os.getpid(), signal.SIGUSR1)
         except AttributeError:
             raise unittest.SkipTest('signal.SIGUSR1 not found on this system')
         # try to kill the worker... AGAIN!
         t = SuicidalWorker(signal.SIGUSR1)
         w.add(t)
         w.run()
         # task should have stepped away from the ledge, and completed successfully despite all the SIGUSR1 signals
         self.assertEqual(list(self.sch.task_list('DONE', '').keys()), [t.task_id])
Exemplo n.º 11
0
    def test_die_for_non_unique_pending(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a
            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        w  = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True)
        w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1)

        self.assertTrue(w.add(b))
        self.assertTrue(w2.add(b))

        self.assertEqual(w._get_work()[0], 'A()')
        self.assertTrue(w2.run())

        self.assertFalse(a.complete())
        self.assertFalse(b.complete())

        w2.stop()
Exemplo n.º 12
0
    def test_allow_reschedule_with_many_missing_deps(self):
        class A(Task):

            """ Task that must run twice to succeed """
            i = luigi.IntParameter()

            runs = 0

            def complete(self):
                return self.runs >= 2

            def run(self):
                self.runs += 1

        class B(Task):
            done = False

            def requires(self):
                return map(A, range(20))

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        b = B()
        w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1)
        self.assertTrue(w.add(b))
        self.assertFalse(w.run())

        # For b to be done, we must have rescheduled its dependencies to run them twice
        self.assertTrue(b.complete())
        self.assertTrue(all(a.complete() for a in b.deps()))
Exemplo n.º 13
0
    def test_requires_exception(self):
        class A(DummyTask):

            def requires(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):

            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        self.assertFalse(w.add(b))
        self.assertTrue(w.run())
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()
Exemplo n.º 14
0
    def test_single_threaded_worker_doesnot_execute_fork_handler(self):
        class Stub(luigi.Task):
            def complete(self):
                return False

            def run(self):
                pass

        s = CentralPlannerScheduler()
        w = Worker(scheduler=s, worker_processes=1)
        w.add(Stub())
        w.run()

        time.sleep(0.1)

        self.assertTrue(self.worker_queue.empty())
        self.assertTrue(self.child_queue.empty())
Exemplo n.º 15
0
def invoke_task(godzilla_task, task_id=None, **kwargs):
    print godzilla_task, task_id, kwargs
    task = tasks[godzilla_task]

    kwargs['task_id'] = task_id
    task = task.as_luigi(**kwargs)

    w = Worker(scheduler=sch)
    w.add(task)

    try:
        w.run()
        if not w.run_succeeded:
            raise Exception("job failed")
        return task.output().path
    finally:
        w.stop()
Exemplo n.º 16
0
class AssistantTest(unittest.TestCase):
    def setUp(self):
        self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X').__enter__()
        self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True)

    def tearDown(self):
        self.w.__exit__(None, None, None)

    def test_get_work(self):
        d = Dummy2Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assistant.run()
        self.assertTrue(d.complete())

    def test_bad_job_type(self):
        class Dummy3Task(Dummy2Task):
            task_family = 'UnknownTaskFamily'

        d = Dummy3Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assertFalse(self.assistant.run())
        self.assertFalse(d.complete())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [str(d)])

    def test_unimported_job_type(self):
        class NotImportedTask(luigi.Task):
            task_family = 'UnimportedTask'
            task_module = None

        task = NotImportedTask()

        # verify that it can't run the task without the module info necessary to import it
        self.w.add(task)
        self.assertFalse(self.assistant.run())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), ['UnimportedTask()'])

        # check that it can import with the right module
        task.task_module = 'dummy_test_module.not_imported'
        self.w.add(task)
        self.assertTrue(self.assistant.run())
        self.assertEqual(list(self.sch.task_list('DONE', '').keys()), ['UnimportedTask()'])
Exemplo n.º 17
0
    def _test_context_manager(self, force_multiprocessing):
        CONTEXT_MANAGER_MODULE = b'''
class MyContextManager(object):
    def __init__(self, task_process):
        self.task = task_process.task
    def __enter__(self):
        assert not self.task.run_event.is_set(), "the task should not have run yet"
        self.task.enter_event.set()
        return self
    def __exit__(self, exc_type=None, exc_value=None, traceback=None):
        assert self.task.run_event.is_set(), "the task should have run"
        self.task.exit_event.set()
'''

        class DummyEventRecordingTask(luigi.Task):
            def __init__(self, *args, **kwargs):
                self.enter_event = multiprocessing.Event()
                self.exit_event = multiprocessing.Event()
                self.run_event = multiprocessing.Event()
                super(DummyEventRecordingTask, self).__init__(*args, **kwargs)

            def run(self):
                assert self.enter_event.is_set(), "the context manager should have been entered"
                assert not self.exit_event.is_set(), "the context manager should not have been exited yet"
                assert not self.run_event.is_set(), "the task should not have run yet"
                self.run_event.set()

            def complete(self):
                return self.run_event.is_set()

        with temporary_unloaded_module(CONTEXT_MANAGER_MODULE) as module_name:
            t = DummyEventRecordingTask()
            w = Worker(task_process_context=module_name + '.MyContextManager',
                       force_multiprocessing=force_multiprocessing)
            w.add(t)
            self.assertTrue(w.run())
            self.assertTrue(t.complete())
            self.assertTrue(t.enter_event.is_set())
            self.assertTrue(t.exit_event.is_set())
Exemplo n.º 18
0
class WorkerEmailTest(EmailTest):
    def setUp(self):
        super(WorkerEmailTest, self).setUp()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.worker = Worker(scheduler=sch, worker_id="foo")

    def test_connection_error(self):
        sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337)
        worker = Worker(scheduler=sch)

        self.waits = 0

        def dummy_wait():
            self.waits += 1

        sch._wait = dummy_wait

        class A(DummyTask):
            pass

        a = A()
        self.assertEquals(self.last_email, None)
        worker.add(a)
        self.assertEquals(self.waits, sch._attempts - 1)
        self.assertNotEquals(self.last_email, None)
        self.assertEquals(self.last_email[0], "Luigi: Framework error while scheduling %s" % (a,))

    def test_complete_error(self):
        class A(DummyTask):
            def complete(self):
                raise Exception("b0rk")

        a = A()
        self.assertEquals(self.last_email, None)
        self.worker.add(a)
        self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0])
        self.worker.run()
        self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0])
        self.assertFalse(a.has_run)

# We removed the non-boolean check due to bug, this should be brought back in after Christmas /nyman
#    def test_complete_return_value(self):
#        class A(DummyTask):
#            def complete(self):
#                return
#
#        a = A()
#        self.assertEquals(self.last_email, None)
#        self.worker.add(a)
#        self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0])
#        self.worker.run()
#        self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0])
#        self.assertFalse(a.has_run)

    def test_run_error(self):
        class A(luigi.Task):
            def complete(self):
                return False

            def run(self):
                raise Exception("b0rk")

        a = A()
        self.worker.add(a)
        self.assertEquals(self.last_email, None)
        self.worker.run()
        self.assertEquals(("Luigi: %s FAILED" % (a,)), self.last_email[0])

    def test_no_error(self):
        class A(DummyTask):
            pass
        a = A()
        self.assertEquals(self.last_email, None)
        self.worker.add(a)
        self.assertEquals(self.last_email, None)
        self.worker.run()
        self.assertEquals(self.last_email, None)
        self.assertTrue(a.complete())
Exemplo n.º 19
0
def luigi_run(task):
    sch = RemoteScheduler()
    w = Worker(scheduler=sch)
    w.add(task)
    w.run()
Exemplo n.º 20
0
 def test_task_limit_not_exceeded(self):
     w = Worker()
     t = ForkBombTask(3, 2)
     w.add(t)
     w.run()
     self.assertTrue(t.complete())
Exemplo n.º 21
0
class WorkerTest(unittest.TestCase):
    def setUp(self):
        # InstanceCache.disable()
        self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X')
        self.w2 = Worker(scheduler=self.sch, worker_id='Y')
        self.time = time.time

    def tearDown(self):
        if time.time != self.time:
            time.time = self.time
        self.w.stop()
        self.w2.stop()

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        class A(Task):
            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run
        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()
        a.has_run = False
        b.has_run = False

        self.w.add(b)
        self.w.run()
        self.assertTrue(a.has_run)
        self.assertTrue(b.has_run)

    def test_external_dep(self):
        class A(ExternalTask):
            def complete(self):
                return False
        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.w.add(b)
        self.w.run()

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_fail(self):
        class A(Task):
            def run(self):
                self.has_run = True
                raise Exception()

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.w.add(b)
        self.w.run()

        self.assertTrue(a.has_run)
        self.assertFalse(b.has_run)

    def test_unknown_dep(self):
        # see central_planner_test.CentralPlannerTest.test_remove_dep
        class A(ExternalTask):
            def complete(self):
                return False

        class C(Task):
            def complete(self):
                return True

        def get_b(dep):
            class B(Task):
                def requires(self):
                    return dep

                def run(self):
                    self.has_run = True

                def complete(self):
                    return False

            b = B()
            b.has_run = False
            return b

        b_a = get_b(A())
        b_c = get_b(C())

        self.w.add(b_a)
        # So now another worker goes in and schedules C -> B
        # This should remove the dep A -> B but will screw up the first worker
        self.w2.add(b_c)

        self.w.run()  # should not run anything - the worker should detect that A is broken
        self.assertFalse(b_a.has_run)
        # not sure what should happen??
        # self.w2.run() # should run B since C is fulfilled
        # self.assertTrue(b_c.has_run)

    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEquals(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        w.add(b)
        w2.add(eb)
        logging.debug("RUNNING BROKEN WORKER")
        w2.run()
        self.assertFalse(a.complete())
        self.assertFalse(b.complete())
        logging.debug("RUNNING FUNCTIONAL WORKER")
        w.run()
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()

    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEquals(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        w2.add(eb)
        w.add(b)

        w2.run()
        self.assertFalse(b.complete())
        w.run()
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()

    def test_interleaved_workers3(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a
            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        w  = Worker(scheduler=sch, worker_id='X', keep_alive=True)
        w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, wait_interval=0.1)

        w.add(a)
        w2.add(b)

        threading.Thread(target=w.run).start()
        w2.run()

        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

        w.stop()
        w2.stop()

    def test_complete_exception(self):
        "Tests that a task is still scheduled if its sister task crashes in the complete() method"
        class A(DummyTask):
            def complete(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):
            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        w.add(b)
        w.run()
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()
Exemplo n.º 22
0
class WorkerTest(unittest.TestCase):

    def setUp(self):
        # InstanceCache.disable()
        self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X')
        self.w2 = Worker(scheduler=self.sch, worker_id='Y')
        self.time = time.time

    def tearDown(self):
        if time.time != self.time:
            time.time = self.time
        self.w.stop()
        self.w2.stop()

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        class A(Task):

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run
        a = A()

        class B(Task):

            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()
        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())
        self.assertTrue(a.has_run)
        self.assertTrue(b.has_run)

    def test_stop_getting_new_work(self):
        d = DummyTask()
        self.w.add(d)

        self.assertFalse(d.complete())
        self.w.handle_interrupt(signal.SIGUSR1, None)
        self.w.run()
        self.assertFalse(d.complete())

    def test_external_dep(self):
        class A(ExternalTask):

            def complete(self):
                return False
        a = A()

        class B(Task):

            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_fail(self):
        class A(Task):

            def run(self):
                self.has_run = True
                raise Exception()

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):

            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertFalse(self.w.run())

        self.assertTrue(a.has_run)
        self.assertFalse(b.has_run)

    def test_unknown_dep(self):
        # see central_planner_test.CentralPlannerTest.test_remove_dep
        class A(ExternalTask):

            def complete(self):
                return False

        class C(Task):

            def complete(self):
                return True

        def get_b(dep):
            class B(Task):

                def requires(self):
                    return dep

                def run(self):
                    self.has_run = True

                def complete(self):
                    return False

            b = B()
            b.has_run = False
            return b

        b_a = get_b(A())
        b_c = get_b(C())

        self.assertTrue(self.w.add(b_a))
        # So now another worker goes in and schedules C -> B
        # This should remove the dep A -> B but will screw up the first worker
        self.assertTrue(self.w2.add(b_c))

        self.assertFalse(self.w.run())  # should not run anything - the worker should detect that A is broken
        self.assertFalse(b_a.has_run)
        # not sure what should happen??
        # self.w2.run() # should run B since C is fulfilled
        # self.assertTrue(b_c.has_run)

    def test_unfulfilled_dep(self):
        class A(Task):

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        def get_b(a):
            class B(A):

                def requires(self):
                    return a
            b = B()
            b.done = False
            a.done = True
            return b

        a = A()
        b = get_b(a)

        self.assertTrue(self.w.add(b))
        a.done = False
        self.w.run()
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

    def test_avoid_infinite_reschedule(self):
        class A(Task):

            def complete(self):
                return False

        class B(Task):

            def complete(self):
                return False

            def requires(self):
                return A()

        self.assertTrue(self.w.add(B()))
        self.assertFalse(self.w.run())

    def test_allow_reschedule_with_many_missing_deps(self):
        class A(Task):

            """ Task that must run twice to succeed """
            i = luigi.IntParameter()

            runs = 0

            def complete(self):
                return self.runs >= 2

            def run(self):
                self.runs += 1

        class B(Task):
            done = False

            def requires(self):
                return map(A, range(20))

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        b = B()
        w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1)
        self.assertTrue(w.add(b))
        self.assertFalse(w.run())

        # For b to be done, we must have rescheduled its dependencies to run them twice
        self.assertTrue(b.complete())
        self.assertTrue(all(a.complete() for a in b.deps()))

    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):

            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEqual(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w.add(b))
        self.assertTrue(w2.add(eb))
        logging.debug("RUNNING BROKEN WORKER")
        self.assertTrue(w2.run())
        self.assertFalse(a.complete())
        self.assertFalse(b.complete())
        logging.debug("RUNNING FUNCTIONAL WORKER")
        self.assertTrue(w.run())
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()

    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEqual(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w2.add(eb))
        self.assertTrue(w.add(b))

        self.assertTrue(w2.run())
        self.assertFalse(b.complete())
        self.assertTrue(w.run())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()

    def test_interleaved_workers3(self):
        class A(DummyTask):

            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):

            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True)
        w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1)

        self.assertTrue(w.add(a))
        self.assertTrue(w2.add(b))

        threading.Thread(target=w.run).start()
        self.assertTrue(w2.run())

        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

        w.stop()
        w2.stop()

    def test_die_for_non_unique_pending(self):
        class A(DummyTask):

            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):

            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True)
        w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1)

        self.assertTrue(w.add(b))
        self.assertTrue(w2.add(b))

        self.assertEqual(w._get_work()[0], 'A()')
        self.assertTrue(w2.run())

        self.assertFalse(a.complete())
        self.assertFalse(b.complete())

        w2.stop()

    def test_complete_exception(self):
        "Tests that a task is still scheduled if its sister task crashes in the complete() method"
        class A(DummyTask):

            def complete(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):

            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        self.assertFalse(w.add(b))
        self.assertTrue(w.run())
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()

    def test_requires_exception(self):
        class A(DummyTask):

            def requires(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):

            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        self.assertFalse(w.add(b))
        self.assertTrue(w.run())
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()
Exemplo n.º 23
0
 def test_no_task_limit(self):
     w = Worker()
     t = ForkBombTask(4, 2)
     w.add(t)
     w.run()
     self.assertTrue(t.complete())
Exemplo n.º 24
0
class WorkerEmailTest(EmailTest):
    def setUp(self):
        super(WorkerEmailTest, self).setUp()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.worker = Worker(scheduler=sch, worker_id="foo")

    def tearDown(self):
        self.worker.stop()

    @with_config(EMAIL_CONFIG)
    def test_connection_error(self):
        sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337)
        worker = Worker(scheduler=sch)

        self.waits = 0

        def dummy_wait():
            self.waits += 1

        sch._wait = dummy_wait

        class A(DummyTask):
            pass

        a = A()
        self.assertEquals(self.last_email, None)
        worker.add(a)
        self.assertEquals(self.waits, 2)  # should attempt to add it 3 times
        self.assertNotEquals(self.last_email, None)
        self.assertEquals(self.last_email[0], "Luigi: Framework error while scheduling %s" % (a,))
        worker.stop()

    @with_config(EMAIL_CONFIG)
    def test_complete_error(self):
        class A(DummyTask):
            def complete(self):
                raise Exception("b0rk")

        a = A()
        self.assertEquals(self.last_email, None)
        self.worker.add(a)
        self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0])
        self.worker.run()
        self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0])
        self.assertFalse(a.has_run)

    @with_config(EMAIL_CONFIG)
    def test_complete_return_value(self):
        class A(DummyTask):
            def complete(self):
                pass  # no return value should be an error

        a = A()
        self.assertEquals(self.last_email, None)
        self.worker.add(a)
        self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0])
        self.worker.run()
        self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0])
        self.assertFalse(a.has_run)

    @with_config(EMAIL_CONFIG)
    def test_run_error(self):
        class A(luigi.Task):
            def complete(self):
                return False

            def run(self):
                raise Exception("b0rk")

        a = A()
        self.worker.add(a)
        self.assertEquals(self.last_email, None)
        self.worker.run()
        self.assertEquals(("Luigi: %s FAILED" % (a,)), self.last_email[0])

    def test_no_error(self):
        class A(DummyTask):
            pass
        a = A()
        self.assertEquals(self.last_email, None)
        self.worker.add(a)
        self.assertEquals(self.last_email, None)
        self.worker.run()
        self.assertEquals(self.last_email, None)
        self.assertTrue(a.complete())
Exemplo n.º 25
0
class ExceptionFormatTest(unittest.TestCase):

    def setUp(self):
        self.sch = CentralPlannerScheduler()
        self.w = Worker(scheduler=self.sch)

    def tear_down(self):
        self.w.stop()

    def test_fail_run(self):
        task = FailRunTask(foo='foo', bar='bar')
        self._run_task(task)

    def test_fail_run_html(self):
        task = FailRunTask(foo='foo', bar='bar')
        self._run_task_html(task)

    def test_fail_schedule(self):
        task = FailSchedulingTask(foo='foo', bar='bar')
        self._run_task(task)

    def test_fail_schedule_html(self):
        task = FailSchedulingTask(foo='foo', bar='bar')
        self._run_task_html(task)

    @with_config({'core': {'error-email': '*****@*****.**',
                           'email-prefix': '[TEST] '}})
    @mock.patch('luigi.notifications.send_error_email')
    def _run_task(self, task, mock_send):
        self.w.add(task)
        self.w.run()

        self.assertEqual(mock_send.call_count, 1)
        args, kwargs = mock_send.call_args
        self._check_subject(args[0], task)
        self._check_body(args[1], task, html=False)

    @with_config({'core': {'error-email': '*****@*****.**',
                           'email-prefix': '[TEST] ',
                           'email-type': 'html'}})
    @mock.patch('luigi.notifications.send_error_email')
    def _run_task_html(self, task, mock_send):
        self.w.add(task)
        self.w.run()

        self.assertEqual(mock_send.call_count, 1)
        args, kwargs = mock_send.call_args
        self._check_subject(args[0], task)
        self._check_body(args[1], task, html=True)

    def _check_subject(self, subject, task):
        self.assertIn(task.task_id, subject)

    def _check_body(self, body, task, html=False):
        if html:
            self.assertIn('<th>name</th><td>{}</td>'.format(task.task_family), body)
            self.assertIn('<div class="highlight"', body)
            self.assertIn('Oops!', body)

            for param, value in task.param_kwargs.items():
                self.assertIn('<th>{}</th><td>{}</td>'.format(param, value), body)
        else:
            self.assertIn('Name: {}\n'.format(task.task_family), body)
            self.assertIn('Parameters:\n', body)
            self.assertIn('TestException: Oops!', body)

            for param, value in task.param_kwargs.items():
                self.assertIn('{}: {}\n'.format(param, value), body)

    @with_config({"core": {"error-email": "[email protected]"}})
    def testEmailRecipients(self):
        six.assertCountEqual(self, notifications._email_recipients(), ["[email protected]"])
        six.assertCountEqual(self, notifications._email_recipients("[email protected]"), ["[email protected]", "[email protected]"])
        six.assertCountEqual(self, notifications._email_recipients(["[email protected]", "[email protected]"]),
                             ["[email protected]", "[email protected]", "[email protected]"])

    @with_config({"core": {}}, replace_sections=True)
    def testEmailRecipientsNoConfig(self):
        six.assertCountEqual(self, notifications._email_recipients(), [])
        six.assertCountEqual(self, notifications._email_recipients("[email protected]"), ["[email protected]"])
        six.assertCountEqual(self, notifications._email_recipients(["[email protected]", "[email protected]"]),
                             ["[email protected]", "[email protected]"])
Exemplo n.º 26
0
 def test_task_limit_not_exceeded(self):
     w = Worker()
     t = ForkBombTask(3, 2)
     w.add(t)
     w.run()
     self.assertTrue(t.complete())
Exemplo n.º 27
0
class WorkerTest(unittest.TestCase):

    def setUp(self):
        # InstanceCache.disable()
        self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X')
        self.w2 = Worker(scheduler=self.sch, worker_id='Y')
        self.time = time.time

    def tearDown(self):
        if time.time != self.time:
            time.time = self.time
        self.w.stop()
        self.w2.stop()

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        class A(Task):

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run
        a = A()

        class B(Task):

            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()
        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())
        self.assertTrue(a.has_run)
        self.assertTrue(b.has_run)

    def test_stop_getting_new_work(self):
        d = DummyTask()
        self.w.add(d)

        self.assertFalse(d.complete())
        try:
            self.w.handle_interrupt(signal.SIGUSR1, None)
        except AttributeError:
            raise unittest.SkipTest('signal.SIGUSR1 not found on this system')
        self.w.run()
        self.assertFalse(d.complete())

    def test_external_dep(self):
        class A(ExternalTask):

            def complete(self):
                return False
        a = A()

        class B(Task):

            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_tracking_url(self):
        tracking_url = 'http://test_url.com/'

        class A(Task):
            has_run = False

            def complete(self):
                return self.has_run

            def run(self, tracking_url_callback=None):
                if tracking_url_callback is not None:
                    tracking_url_callback(tracking_url)
                self.has_run = True

        a = A()
        self.assertTrue(self.w.add(a))
        self.assertTrue(self.w.run())
        tasks = self.sch.task_list('DONE', '')
        self.assertEqual(1, len(tasks))
        self.assertEqual(tracking_url, tasks['A()']['tracking_url'])

    def test_type_error_in_tracking_run(self):
        class A(Task):
            num_runs = 0

            def complete(self):
                return False

            def run(self, tracking_url_callback=None):
                self.num_runs += 1
                raise TypeError('bad type')

        a = A()
        self.assertTrue(self.w.add(a))
        self.assertFalse(self.w.run())

        # Should only run and fail once, not retry because of the type error
        self.assertEqual(1, a.num_runs)

    def test_fail(self):
        class CustomException(BaseException):
            def __init__(self, msg):
                self.msg = msg

        class A(Task):

            def run(self):
                self.has_run = True
                raise CustomException('bad things')

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):

            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertFalse(self.w.run())

        self.assertTrue(a.has_run)
        self.assertFalse(b.has_run)

    def test_unknown_dep(self):
        # see central_planner_test.CentralPlannerTest.test_remove_dep
        class A(ExternalTask):

            def complete(self):
                return False

        class C(Task):

            def complete(self):
                return True

        def get_b(dep):
            class B(Task):

                def requires(self):
                    return dep

                def run(self):
                    self.has_run = True

                def complete(self):
                    return False

            b = B()
            b.has_run = False
            return b

        b_a = get_b(A())
        b_c = get_b(C())

        self.assertTrue(self.w.add(b_a))
        # So now another worker goes in and schedules C -> B
        # This should remove the dep A -> B but will screw up the first worker
        self.assertTrue(self.w2.add(b_c))

        self.assertFalse(self.w.run())  # should not run anything - the worker should detect that A is broken
        self.assertFalse(b_a.has_run)
        # not sure what should happen??
        # self.w2.run() # should run B since C is fulfilled
        # self.assertTrue(b_c.has_run)

    def test_unfulfilled_dep(self):
        class A(Task):

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        def get_b(a):
            class B(A):

                def requires(self):
                    return a
            b = B()
            b.done = False
            a.done = True
            return b

        a = A()
        b = get_b(a)

        self.assertTrue(self.w.add(b))
        a.done = False
        self.w.run()
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

    def test_gets_missed_work(self):
        class A(Task):
            done = False

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        a = A()
        self.assertTrue(self.w.add(a))

        # simulate a missed get_work response
        self.assertEqual('A()', self.sch.get_work(worker='X')['task_id'])

        self.assertTrue(self.w.run())
        self.assertTrue(a.complete())

    def test_avoid_infinite_reschedule(self):
        class A(Task):

            def complete(self):
                return False

        class B(Task):

            def complete(self):
                return False

            def requires(self):
                return A()

        self.assertTrue(self.w.add(B()))
        self.assertFalse(self.w.run())

    def test_fails_registering_signal(self):
        with mock.patch('luigi.worker.signal', spec=['signal']):
            # mock will raise an attribute error getting signal.SIGUSR1
            Worker()

    def test_allow_reschedule_with_many_missing_deps(self):
        class A(Task):

            """ Task that must run twice to succeed """
            i = luigi.IntParameter()

            runs = 0

            def complete(self):
                return self.runs >= 2

            def run(self):
                self.runs += 1

        class B(Task):
            done = False

            def requires(self):
                return map(A, range(20))

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        b = B()
        w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1)
        self.assertTrue(w.add(b))
        self.assertFalse(w.run())

        # For b to be done, we must have rescheduled its dependencies to run them twice
        self.assertTrue(b.complete())
        self.assertTrue(all(a.complete() for a in b.deps()))

    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):

            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEqual(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w.add(b))
        self.assertTrue(w2.add(eb))
        logging.debug("RUNNING BROKEN WORKER")
        self.assertTrue(w2.run())
        self.assertFalse(a.complete())
        self.assertFalse(b.complete())
        logging.debug("RUNNING FUNCTIONAL WORKER")
        self.assertTrue(w.run())
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()

    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEqual(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w2.add(eb))
        self.assertTrue(w.add(b))

        self.assertTrue(w2.run())
        self.assertFalse(b.complete())
        self.assertTrue(w.run())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()

    def test_interleaved_workers3(self):
        class A(DummyTask):

            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):

            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True)
        w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1)

        self.assertTrue(w.add(a))
        self.assertTrue(w2.add(b))

        threading.Thread(target=w.run).start()
        self.assertTrue(w2.run())

        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

        w.stop()
        w2.stop()

    def test_die_for_non_unique_pending(self):
        class A(DummyTask):

            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):

            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True)
        w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1)

        self.assertTrue(w.add(b))
        self.assertTrue(w2.add(b))

        self.assertEqual(w._get_work()[0], 'A()')
        self.assertTrue(w2.run())

        self.assertFalse(a.complete())
        self.assertFalse(b.complete())

        w2.stop()

    def test_complete_exception(self):
        "Tests that a task is still scheduled if its sister task crashes in the complete() method"
        class A(DummyTask):

            def complete(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):

            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        self.assertFalse(w.add(b))
        self.assertTrue(w.run())
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()

    def test_requires_exception(self):
        class A(DummyTask):

            def requires(self):
                raise Exception("doh")

        a = A()

        class D(DummyTask):
            pass

        d = D()

        class C(DummyTask):
            def requires(self):
                return d

        c = C()

        class B(DummyTask):

            def requires(self):
                return c, a

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        self.assertFalse(w.add(b))
        self.assertTrue(w.run())
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertTrue(d.has_run)
        self.assertFalse(a.has_run)
        w.stop()
Exemplo n.º 28
0
class WorkerEmailTest(unittest.TestCase):

    def setUp(self):
        super(WorkerEmailTest, self).setUp()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.worker = Worker(scheduler=sch, worker_id="foo")

    def tearDown(self):
        self.worker.stop()

    @email_patch
    def test_connection_error(self, emails):
        sch = RemoteScheduler('http://tld.invalid:1337', connect_timeout=1)
        worker = Worker(scheduler=sch)

        self.waits = 0

        def dummy_wait():
            self.waits += 1

        sch._wait = dummy_wait

        class A(DummyTask):
            pass

        a = A()
        self.assertEqual(emails, [])
        worker.add(a)
        self.assertEqual(self.waits, 2)  # should attempt to add it 3 times
        self.assertNotEquals(emails, [])
        self.assertTrue(emails[0].find("Luigi: Framework error while scheduling %s" % (a,)) != -1)
        worker.stop()

    @email_patch
    def test_complete_error(self, emails):
        class A(DummyTask):

            def complete(self):
                raise Exception("b0rk")

        a = A()
        self.assertEqual(emails, [])
        self.worker.add(a)
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1)
        self.worker.run()
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1)
        self.assertFalse(a.has_run)

    @email_patch
    def test_complete_return_value(self, emails):
        class A(DummyTask):

            def complete(self):
                pass  # no return value should be an error

        a = A()
        self.assertEqual(emails, [])
        self.worker.add(a)
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1)
        self.worker.run()
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1)
        self.assertFalse(a.has_run)

    @email_patch
    def test_run_error(self, emails):
        class A(luigi.Task):

            def complete(self):
                return False

            def run(self):
                raise Exception("b0rk")

        a = A()
        self.worker.add(a)
        self.assertEqual(emails, [])
        self.worker.run()
        self.assertTrue(emails[0].find("Luigi: %s FAILED" % (a,)) != -1)

    @email_patch
    def test_no_error(self, emails):
        class A(DummyTask):
            pass
        a = A()
        self.assertEqual(emails, [])
        self.worker.add(a)
        self.assertEqual(emails, [])
        self.worker.run()
        self.assertEqual(emails, [])
        self.assertTrue(a.complete())
Exemplo n.º 29
0
class WorkerEmailTest(unittest.TestCase):

    def setUp(self):
        super(WorkerEmailTest, self).setUp()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.worker = Worker(scheduler=sch, worker_id="foo").__enter__()

    def tearDown(self):
        self.worker.__exit__(None, None, None)

    @email_patch
    def test_connection_error(self, emails):
        sch = RemoteScheduler('http://tld.invalid:1337', connect_timeout=1)

        self.waits = 0

        def dummy_wait():
            self.waits += 1

        sch._wait = dummy_wait

        class A(DummyTask):
            pass

        a = A()
        self.assertEqual(emails, [])
        with Worker(scheduler=sch) as worker:
            worker.add(a)
            self.assertEqual(self.waits, 2)  # should attempt to add it 3 times
            self.assertNotEqual(emails, [])
            self.assertTrue(emails[0].find("Luigi: Framework error while scheduling %s" % (a,)) != -1)

    @email_patch
    def test_complete_error(self, emails):
        class A(DummyTask):

            def complete(self):
                raise Exception("b0rk")

        a = A()
        self.assertEqual(emails, [])
        self.worker.add(a)
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1)
        self.worker.run()
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1)
        self.assertFalse(a.has_run)

    @email_patch
    def test_requires_error(self, emails):
        class A(DummyTask):

            def requires(self):
                raise Exception("b0rk")

        a = A()
        self.assertEqual(emails, [])
        self.worker.add(a)
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1)
        self.worker.run()
        self.assertFalse(a.has_run)

    @email_patch
    def test_complete_return_value(self, emails):
        class A(DummyTask):

            def complete(self):
                pass  # no return value should be an error

        a = A()
        self.assertEqual(emails, [])
        self.worker.add(a)
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1)
        self.worker.run()
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1)
        self.assertFalse(a.has_run)

    @email_patch
    def test_run_error(self, emails):
        class A(luigi.Task):

            def complete(self):
                return False

            def run(self):
                raise Exception("b0rk")

        a = A()
        self.worker.add(a)
        self.assertEqual(emails, [])
        self.worker.run()
        self.assertTrue(emails[0].find("Luigi: %s FAILED" % (a,)) != -1)

    @email_patch
    def test_no_error(self, emails):
        class A(DummyTask):
            pass
        a = A()
        self.assertEqual(emails, [])
        self.worker.add(a)
        self.assertEqual(emails, [])
        self.worker.run()
        self.assertEqual(emails, [])
        self.assertTrue(a.complete())

    @custom_email_patch({"core": {"error-email": "not-a-real-email-address-for-test-only", 'email-type': 'none'}})
    def test_disable_emails(self, emails):
        class A(luigi.Task):

            def complete(self):
                raise Exception("b0rk")

        self.worker.add(A())
        self.assertEqual(emails, [])
Exemplo n.º 30
0
class WorkerTest(unittest.TestCase):
    def setUp(self):
        # InstanceCache.disable()
        self.sch = CentralPlannerScheduler(retry_delay=100,
                                           remove_delay=1000,
                                           worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X')
        self.w2 = Worker(scheduler=self.sch, worker_id='Y')
        self.time = time.time

    def tearDown(self):
        if time.time != self.time:
            time.time = self.time
        self.w.stop()
        self.w2.stop()

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        class A(Task):
            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()
        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())
        self.assertTrue(a.has_run)
        self.assertTrue(b.has_run)

    def test_stop_getting_new_work(self):
        d = DummyTask()
        self.w.add(d)

        self.assertFalse(d.complete())
        self.w.handle_interrupt(signal.SIGUSR1, None)
        self.w.run()
        self.assertFalse(d.complete())

    def test_external_dep(self):
        class A(ExternalTask):
            def complete(self):
                return False

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_fail(self):
        class A(Task):
            def run(self):
                self.has_run = True
                raise Exception()

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertFalse(self.w.run())

        self.assertTrue(a.has_run)
        self.assertFalse(b.has_run)

    def test_unknown_dep(self):
        # see central_planner_test.CentralPlannerTest.test_remove_dep
        class A(ExternalTask):
            def complete(self):
                return False

        class C(Task):
            def complete(self):
                return True

        def get_b(dep):
            class B(Task):
                def requires(self):
                    return dep

                def run(self):
                    self.has_run = True

                def complete(self):
                    return False

            b = B()
            b.has_run = False
            return b

        b_a = get_b(A())
        b_c = get_b(C())

        self.assertTrue(self.w.add(b_a))
        # So now another worker goes in and schedules C -> B
        # This should remove the dep A -> B but will screw up the first worker
        self.assertTrue(self.w2.add(b_c))

        self.assertFalse(
            self.w.run()
        )  # should not run anything - the worker should detect that A is broken
        self.assertFalse(b_a.has_run)
        # not sure what should happen??
        # self.w2.run() # should run B since C is fulfilled
        # self.assertTrue(b_c.has_run)

    def test_unfulfilled_dep(self):
        class A(Task):
            def complete(self):
                return self.done

            def run(self):
                self.done = True

        def get_b(a):
            class B(A):
                def requires(self):
                    return a

            b = B()
            b.done = False
            a.done = True
            return b

        a = A()
        b = get_b(a)

        self.assertTrue(self.w.add(b))
        a.done = False
        self.w.run()
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

    def test_avoid_infinite_reschedule(self):
        class A(Task):
            def complete(self):
                return False

        class B(Task):
            def complete(self):
                return False

            def requires(self):
                return A()

        self.assertTrue(self.w.add(B()))
        self.assertFalse(self.w.run())

    def test_allow_reschedule_with_many_missing_deps(self):
        class A(Task):
            """ Task that must run twice to succeed """
            i = luigi.IntParameter()

            runs = 0

            def complete(self):
                return self.runs >= 2

            def run(self):
                self.runs += 1

        class B(Task):
            done = False

            def requires(self):
                return map(A, range(20))

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        b = B()
        w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1)
        self.assertTrue(w.add(b))
        self.assertFalse(w.run())

        # For b to be done, we must have rescheduled its dependencies to run them twice
        self.assertTrue(b.complete())
        self.assertTrue(all(a.complete() for a in b.deps()))

    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEqual(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w.add(b))
        self.assertTrue(w2.add(eb))
        logging.debug("RUNNING BROKEN WORKER")
        self.assertTrue(w2.run())
        self.assertFalse(a.complete())
        self.assertFalse(b.complete())
        logging.debug("RUNNING FUNCTIONAL WORKER")
        self.assertTrue(w.run())
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()

    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEqual(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w2.add(eb))
        self.assertTrue(w.add(b))

        self.assertTrue(w2.run())
        self.assertFalse(b.complete())
        self.assertTrue(w.run())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()

    def test_interleaved_workers3(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)

        w = Worker(scheduler=sch,
                   worker_id='X',
                   keep_alive=True,
                   count_uniques=True)
        w2 = Worker(scheduler=sch,
                    worker_id='Y',
                    keep_alive=True,
                    count_uniques=True,
                    wait_interval=0.1)

        self.assertTrue(w.add(a))
        self.assertTrue(w2.add(b))

        threading.Thread(target=w.run).start()
        self.assertTrue(w2.run())

        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

        w.stop()
        w2.stop()

    def test_die_for_non_unique_pending(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)

        w = Worker(scheduler=sch,
                   worker_id='X',
                   keep_alive=True,
                   count_uniques=True)
        w2 = Worker(scheduler=sch,
                    worker_id='Y',
                    keep_alive=True,
                    count_uniques=True,
                    wait_interval=0.1)

        self.assertTrue(w.add(b))
        self.assertTrue(w2.add(b))

        self.assertEqual(w._get_work()[0], 'A()')
        self.assertTrue(w2.run())

        self.assertFalse(a.complete())
        self.assertFalse(b.complete())

        w2.stop()

    def test_complete_exception(self):
        "Tests that a task is still scheduled if its sister task crashes in the complete() method"

        class A(DummyTask):
            def complete(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):
            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        self.assertFalse(w.add(b))
        self.assertTrue(w.run())
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()

    def test_requires_exception(self):
        class A(DummyTask):
            def requires(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):
            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        self.assertFalse(w.add(b))
        self.assertTrue(w.run())
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()
Exemplo n.º 31
0
class WorkerEmailTest(unittest.TestCase):
    def setUp(self):
        super(WorkerEmailTest, self).setUp()
        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        self.worker = Worker(scheduler=sch, worker_id="foo")

    def tearDown(self):
        self.worker.stop()

    @email_patch
    def test_connection_error(self, emails):
        sch = RemoteScheduler(host="this_host_doesnt_exist",
                              port=1337,
                              connect_timeout=1)
        worker = Worker(scheduler=sch)

        self.waits = 0

        def dummy_wait():
            self.waits += 1

        sch._wait = dummy_wait

        class A(DummyTask):
            pass

        a = A()
        self.assertEqual(emails, [])
        worker.add(a)
        self.assertEqual(self.waits, 2)  # should attempt to add it 3 times
        self.assertNotEquals(emails, [])
        self.assertTrue(
            emails[0].find("Luigi: Framework error while scheduling %s" %
                           (a, )) != -1)
        worker.stop()

    @email_patch
    def test_complete_error(self, emails):
        class A(DummyTask):
            def complete(self):
                raise Exception("b0rk")

        a = A()
        self.assertEqual(emails, [])
        self.worker.add(a)
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" %
                                       (a, )) != -1)
        self.worker.run()
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" %
                                       (a, )) != -1)
        self.assertFalse(a.has_run)

    @email_patch
    def test_complete_return_value(self, emails):
        class A(DummyTask):
            def complete(self):
                pass  # no return value should be an error

        a = A()
        self.assertEqual(emails, [])
        self.worker.add(a)
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" %
                                       (a, )) != -1)
        self.worker.run()
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" %
                                       (a, )) != -1)
        self.assertFalse(a.has_run)

    @email_patch
    def test_run_error(self, emails):
        class A(luigi.Task):
            def complete(self):
                return False

            def run(self):
                raise Exception("b0rk")

        a = A()
        self.worker.add(a)
        self.assertEqual(emails, [])
        self.worker.run()
        self.assertTrue(emails[0].find("Luigi: %s FAILED" % (a, )) != -1)

    @email_patch
    def test_no_error(self, emails):
        class A(DummyTask):
            pass

        a = A()
        self.assertEqual(emails, [])
        self.worker.add(a)
        self.assertEqual(emails, [])
        self.worker.run()
        self.assertEqual(emails, [])
        self.assertTrue(a.complete())
Exemplo n.º 32
0
class WorkerTest(unittest.TestCase):
    def setUp(self):
        # InstanceCache.disable()
        self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X')
        self.w2 = Worker(scheduler=self.sch, worker_id='Y')
        self.time = time.time

    def tearDown(self):
        if time.time != self.time:
            time.time = self.time
        self.w.stop()
        self.w2.stop()

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        class A(Task):
            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run
        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()
        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())
        self.assertTrue(a.has_run)
        self.assertTrue(b.has_run)

    def test_external_dep(self):
        class A(ExternalTask):
            def complete(self):
                return False
        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_fail(self):
        class A(Task):
            def run(self):
                self.has_run = True
                raise Exception()

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertFalse(self.w.run())

        self.assertTrue(a.has_run)
        self.assertFalse(b.has_run)

    def test_unknown_dep(self):
        # see central_planner_test.CentralPlannerTest.test_remove_dep
        class A(ExternalTask):
            def complete(self):
                return False

        class C(Task):
            def complete(self):
                return True

        def get_b(dep):
            class B(Task):
                def requires(self):
                    return dep

                def run(self):
                    self.has_run = True

                def complete(self):
                    return False

            b = B()
            b.has_run = False
            return b

        b_a = get_b(A())
        b_c = get_b(C())

        self.assertTrue(self.w.add(b_a))
        # So now another worker goes in and schedules C -> B
        # This should remove the dep A -> B but will screw up the first worker
        self.assertTrue(self.w2.add(b_c))

        self.assertFalse(self.w.run())  # should not run anything - the worker should detect that A is broken
        self.assertFalse(b_a.has_run)
        # not sure what should happen??
        # self.w2.run() # should run B since C is fulfilled
        # self.assertTrue(b_c.has_run)

    def test_unfulfilled_dep(self):
        class A(Task):
            def complete(self):
                return self.done

            def run(self):
                self.done = True

        def get_b(a):
            class B(A):
                def requires(self):
                    return a
            b = B()
            b.done = False
            a.done = True
            return b

        a = A()
        b = get_b(a)

        self.assertTrue(self.w.add(b))
        a.done = False
        self.w.run()
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

    def test_dynamic_dependencies(self):

        class DynamicRequires(Task):
            p = luigi.Parameter()

            def output(self):
                return luigi.LocalTarget(os.path.join(self.p, 'parent'))

            def run(self):
                dummy_targets = yield [DynamicDummyTask(os.path.join(self.p, str(i)))
                                     for i in range(5)]
                dummy_targets += yield [DynamicDummyTask(os.path.join(self.p, str(i)))
                                       for i in range(5, 7)]
                with self.output().open('w') as f:
                    for i, d in enumerate(dummy_targets):
                        for line in d.open('r'):
                            print >>f, '%d: %s' % (i, line.strip())

        t = DynamicRequires(p=tempfile.mktemp())
        luigi.build([t], local_scheduler=True)
        self.assertTrue(t.complete())

        # loop through output and verify
        f = t.output().open('r')
        for i in xrange(7):
            self.assertEqual(f.readline().strip(), '%d: Done!' % i)

    def test_avoid_infinite_reschedule(self):
        class A(Task):
            def complete(self):
                return False

        class B(Task):
            def complete(self):
                return False

            def requires(self):
                return A()

        self.assertTrue(self.w.add(B()))
        self.assertFalse(self.w.run())

    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEqual(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w.add(b))
        self.assertTrue(w2.add(eb))
        logging.debug("RUNNING BROKEN WORKER")
        self.assertTrue(w2.run())
        self.assertFalse(a.complete())
        self.assertFalse(b.complete())
        logging.debug("RUNNING FUNCTIONAL WORKER")
        self.assertTrue(w.run())
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()

    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEqual(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w2.add(eb))
        self.assertTrue(w.add(b))

        self.assertTrue(w2.run())
        self.assertFalse(b.complete())
        self.assertTrue(w.run())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()

    def test_interleaved_workers3(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a
            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        w  = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True)
        w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1)

        self.assertTrue(w.add(a))
        self.assertTrue(w2.add(b))

        threading.Thread(target=w.run).start()
        self.assertTrue(w2.run())

        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

        w.stop()
        w2.stop()

    def test_die_for_non_unique_pending(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a
            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        w  = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True)
        w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1)

        self.assertTrue(w.add(b))
        self.assertTrue(w2.add(b))

        self.assertEqual(w._get_work()[0], 'A()')
        self.assertTrue(w2.run())

        self.assertFalse(a.complete())
        self.assertFalse(b.complete())

        w2.stop()

    def test_complete_exception(self):
        "Tests that a task is still scheduled if its sister task crashes in the complete() method"
        class A(DummyTask):
            def complete(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):
            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        self.assertFalse(w.add(b))
        self.assertTrue(w.run())
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()

    def test_requires_exception(self):
        class A(DummyTask):
            def requires(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):
            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        self.assertFalse(w.add(b))
        self.assertTrue(w.run())
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()
Exemplo n.º 33
0
class WorkerTest(unittest.TestCase):
    def setUp(self):
        # InstanceCache.disable()
        self.sch = CentralPlannerScheduler(retry_delay=100,
                                           remove_delay=1000,
                                           worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X')
        self.w2 = Worker(scheduler=self.sch, worker_id='Y')
        self.time = time.time

    def tearDown(self):
        if time.time != self.time:
            time.time = self.time

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        class A(Task):
            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()
        a.has_run = False
        b.has_run = False

        self.w.add(b)
        self.w.run()
        self.assertTrue(a.has_run)
        self.assertTrue(b.has_run)

    def test_external_dep(self):
        class A(ExternalTask):
            def complete(self):
                return False

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.w.add(b)
        self.w.run()

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_fail(self):
        class A(Task):
            def run(self):
                self.has_run = True
                raise Exception()

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.w.add(b)
        self.w.run()

        self.assertTrue(a.has_run)
        self.assertFalse(b.has_run)

    def test_unknown_dep(self):
        # see central_planner_test.CentralPlannerTest.test_remove_dep
        class A(ExternalTask):
            def complete(self):
                return False

        class C(Task):
            def complete(self):
                return True

        def get_b(dep):
            class B(Task):
                def requires(self):
                    return dep

                def run(self):
                    self.has_run = True

                def complete(self):
                    return False

            b = B()
            b.has_run = False
            return b

        b_a = get_b(A())
        b_c = get_b(C())

        self.w.add(b_a)
        # So now another worker goes in and schedules C -> B
        # This should remove the dep A -> B but will screw up the first worker
        self.w2.add(b_c)

        self.w.run(
        )  # should not run anything - the worker should detect that A is broken
        self.assertFalse(b_a.has_run)
        # not sure what should happen??
        # self.w2.run() # should run B since C is fulfilled
        # self.assertTrue(b_c.has_run)

    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEquals(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        w.add(b)
        w2.add(eb)
        logging.debug("RUNNING BROKEN WORKER")
        w2.run()
        self.assertFalse(a.complete())
        self.assertFalse(b.complete())
        logging.debug("RUNNING FUNCTIONAL WORKER")
        w.run()
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEquals(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        w2.add(eb)
        w.add(b)

        w2.run()
        self.assertFalse(b.complete())
        w.run()
        self.assertTrue(b.complete())
Exemplo n.º 34
0
 def test_no_task_limit(self):
     w = Worker()
     t = ForkBombTask(4, 2)
     w.add(t)
     w.run()
     self.assertTrue(t.complete())
Exemplo n.º 35
0
class MultiprocessWorkerTest(unittest.TestCase):

    def setUp(self):
        self.scheduler = RemoteScheduler()
        self.scheduler.add_worker = Mock()
        self.scheduler.add_task = Mock()
        self.worker = Worker(scheduler=self.scheduler, worker_id='X', worker_processes=2).__enter__()

    def tearDown(self):
        self.worker.__exit__(None, None, None)

    def gw_res(self, pending, task_id):
        return dict(n_pending_tasks=pending,
                    task_id=task_id,
                    running_tasks=0, n_unique_pending=0)

    def test_positive_path(self):
        a = DummyTask("a")
        b = DummyTask("b")

        class MultipleRequirementTask(DummyTask):

            def requires(self):
                return [a, b]

        c = MultipleRequirementTask("C")

        self.assertTrue(self.worker.add(c))

        self.scheduler.get_work = Mock(side_effect=[self.gw_res(3, str(a)),
                                                    self.gw_res(2, str(b)),
                                                    self.gw_res(1, str(c)),
                                                    self.gw_res(0, None),
                                                    self.gw_res(0, None)])

        self.assertTrue(self.worker.run())
        self.assertTrue(c.has_run)

    def test_path_with_task_failures(self):
        class FailingTask(DummyTask):

            def run(self):
                raise Exception("I am failing")

        a = FailingTask("a")
        b = FailingTask("b")

        class MultipleRequirementTask(DummyTask):

            def requires(self):
                return [a, b]

        c = MultipleRequirementTask("C")

        self.assertTrue(self.worker.add(c))

        self.scheduler.get_work = Mock(side_effect=[self.gw_res(3, str(a)),
                                                    self.gw_res(2, str(b)),
                                                    self.gw_res(1, str(c)),
                                                    self.gw_res(0, None),
                                                    self.gw_res(0, None)])

        self.assertFalse(self.worker.run())