Пример #1
0
    def test_not_yet_started(self):
        jf = MockEmrObject(
            creationdatetime=to_iso8601(datetime(2010, 6, 6, 4)))

        self.assertEqual(est_time_to_hour(jf, now=datetime(2010, 6, 6, 4, 35)),
                         timedelta(minutes=25))

        self.assertEqual(est_time_to_hour(jf, now=datetime(2010, 6, 6, 5, 20)),
                         timedelta(minutes=40))

        self.assertEqual(est_time_to_hour(jf, now=datetime(2010, 6, 6, 4)),
                         timedelta(minutes=60))
Пример #2
0
    def test_not_yet_started(self):
        jf = MockEmrObject(
            creationdatetime=to_iso8601(datetime(2010, 6, 6, 4)))

        self.assertEqual(
            est_time_to_hour(jf, now=datetime(2010, 6, 6, 4, 35)),
            timedelta(minutes=25))

        self.assertEqual(
            est_time_to_hour(jf, now=datetime(2010, 6, 6, 5, 20)),
            timedelta(minutes=40))

        self.assertEqual(
            est_time_to_hour(jf, now=datetime(2010, 6, 6, 4)),
            timedelta(minutes=60))
Пример #3
0
    def test_now_is_automatically_set(self):
        jf = MockEmrObject(creationdatetime=to_iso8601(datetime.utcnow()))

        t = est_time_to_hour(jf)

        self.assertLessEqual(t, timedelta(minutes=60))
        self.assertGreater(t, timedelta(minutes=59))

        jf2 = MockEmrObject(creationdatetime=to_iso8601(datetime.utcnow() -
                                                        timedelta(minutes=1)),
                            startdatetime=to_iso8601(datetime.utcnow()))

        t = est_time_to_hour(jf2)

        self.assertLessEqual(t, timedelta(minutes=60))
        self.assertGreater(t, timedelta(minutes=59))
Пример #4
0
    def test_now_is_automatically_set(self):
        jf = MockEmrObject(
            creationdatetime=to_iso8601(datetime.utcnow()))

        t = est_time_to_hour(jf)

        self.assertLessEqual(t, timedelta(minutes=60))
        self.assertGreater(t, timedelta(minutes=59))

        jf2 = MockEmrObject(
            creationdatetime=to_iso8601(
                datetime.utcnow() - timedelta(minutes=1)),
            startdatetime=to_iso8601(datetime.utcnow()))

        t = est_time_to_hour(jf2)

        self.assertLessEqual(t, timedelta(minutes=60))
        self.assertGreater(t, timedelta(minutes=59))
 def test_currently_running(self):
     jf = self.mock_emr_job_flows['j-CURRENTLY_RUNNING']
     self.assertEqual(is_job_flow_done(jf), False)
     self.assertEqual(is_job_flow_running(jf), True)
     self.assertEqual(is_job_flow_non_streaming(jf), False)
     self.assertEqual(time_job_flow_idle(jf, self.now), timedelta(0))
     self.assertEqual(est_time_to_hour(jf, self.now),
                      timedelta(minutes=45))
     self.assertEqual(pool_hash_and_name(jf), (None, None))
Пример #6
0
    def test_clock_skew(self):
        # make sure something reasonable happens if now is before
        # the start time
        jf = MockEmrObject(
            creationdatetime=to_iso8601(datetime(2010, 6, 6, 4)),
            startdatetime=to_iso8601(datetime(2010, 6, 6, 4, 26)))

        self.assertEqual(
            est_time_to_hour(jf, now=datetime(2010, 6, 6, 4, 25, 59)),
            timedelta(seconds=1))
    def test_idle_and_failed(self):
        jf = self.mock_emr_job_flows['j-IDLE_AND_FAILED']

        self.assertEqual(is_job_flow_done(jf), False)
        self.assertEqual(is_job_flow_running(jf), False)
        self.assertEqual(is_job_flow_non_streaming(jf), False)
        self.assertEqual(time_job_flow_idle(jf, self.now), timedelta(hours=3))
        self.assertEqual(est_time_to_hour(jf, self.now),
                         timedelta(hours=1))
        self.assertEqual(pool_hash_and_name(jf), (None, None))
    def test_hadoop_debugging_job_flow(self):
        jf = self.mock_emr_job_flows['j-HADOOP_DEBUGGING']

        self.assertEqual(is_job_flow_done(jf), False)
        self.assertEqual(is_job_flow_running(jf), False)
        self.assertEqual(is_job_flow_non_streaming(jf), False)
        self.assertEqual(time_job_flow_idle(jf, self.now), timedelta(hours=2))
        self.assertEqual(est_time_to_hour(jf, self.now),
                         timedelta(hours=1))
        self.assertEqual(pool_hash_and_name(jf), (None, None))
    def test_hive_job_flow(self):
        jf = self.mock_emr_job_flows['j-HIVE']

        self.assertEqual(is_job_flow_done(jf), False)
        self.assertEqual(is_job_flow_running(jf), False)
        self.assertEqual(is_job_flow_non_streaming(jf), True)
        self.assertEqual(time_job_flow_idle(jf, self.now), timedelta(hours=4))
        self.assertEqual(est_time_to_hour(jf, self.now),
                         timedelta(hours=1))
        self.assertEqual(pool_hash_and_name(jf), (None, None))
Пример #10
0
    def test_clock_skew(self):
        # make sure something reasonable happens if now is before
        # the start time
        jf = MockEmrObject(creationdatetime=to_iso8601(datetime(2010, 6, 6,
                                                                4)),
                           startdatetime=to_iso8601(datetime(
                               2010, 6, 6, 4, 26)))

        self.assertEqual(
            est_time_to_hour(jf, now=datetime(2010, 6, 6, 4, 25, 59)),
            timedelta(seconds=1))
    def test_pooled(self):
        jf = self.mock_emr_job_flows['j-POOLED']

        self.assertEqual(is_job_flow_done(jf), False)
        self.assertEqual(is_job_flow_running(jf), False)
        self.assertEqual(is_job_flow_non_streaming(jf), False)
        self.assertEqual(time_job_flow_idle(jf, self.now),
                         timedelta(minutes=55))
        self.assertEqual(est_time_to_hour(jf, self.now),
                         timedelta(minutes=5))
        self.assertEqual(pool_hash_and_name(jf),
                         ('0123456789abcdef0123456789abcdef', 'reflecting'))
 def assertJobFlowIs(
     self, jf,
     bootstrapping=False,
     done=False,
     from_end_of_hour=timedelta(hours=1),
     has_pending_steps=False,
     idle_for=timedelta(0),
     pool_hash=None,
     pool_name=None,
     running=False,
     streaming=True,
 ):
     self.assertEqual(bootstrapping, is_job_flow_bootstrapping(jf))
     self.assertEqual(done, is_job_flow_done(jf))
     self.assertEqual(from_end_of_hour, est_time_to_hour(jf, self.now))
     self.assertEqual(has_pending_steps, job_flow_has_pending_steps(jf))
     self.assertEqual(idle_for, self.time_job_flow_idle(jf))
     self.assertEqual((pool_hash, pool_name), pool_hash_and_name(jf))
     self.assertEqual(running, is_job_flow_running(jf))
     self.assertEqual(streaming, is_job_flow_streaming(jf))
def inspect_and_maybe_terminate_job_flows(
    conf_path=None,
    dry_run=False,
    max_hours_idle=None,
    mins_to_end_of_hour=None,
    now=None,
    pool_name=None,
    pooled_only=False,
    unpooled_only=False,
    max_mins_locked=None,
    quiet=False,
    **kwargs
):

    if now is None:
        now = datetime.utcnow()

    # old default behavior
    if max_hours_idle is None and mins_to_end_of_hour is None:
        max_hours_idle = DEFAULT_MAX_HOURS_IDLE

    runner = EMRJobRunner(conf_path=conf_path, **kwargs)
    emr_conn = runner.make_emr_conn()

    log.info(
        'getting info about all job flows (this goes back about 2 months)')
    # We don't filter by job flow state because we want this to work even
    # if Amazon adds another kind of idle state.
    job_flows = describe_all_job_flows(emr_conn)

    num_bootstrapping = 0
    num_done = 0
    num_idle = 0
    num_non_streaming = 0
    num_pending = 0
    num_running = 0

    # a list of tuples of job flow id, name, idle time (as a timedelta)
    to_terminate = []

    for jf in job_flows:

        # check if job flow is done
        if is_job_flow_done(jf):
            num_done += 1

        # check if job flow is bootstrapping
        elif is_job_flow_bootstrapping(jf):
            num_bootstrapping += 1

        # we can't really tell if non-streaming jobs are idle or not, so
        # let them be (see Issue #60)
        elif not is_job_flow_streaming(jf):
            num_non_streaming += 1

        elif is_job_flow_running(jf):
            num_running += 1

        else:
            time_idle = now - time_last_active(jf)
            time_to_end_of_hour = est_time_to_hour(jf, now=now)
            _, pool = pool_hash_and_name(jf)
            pending = job_flow_has_pending_steps(jf)

            if pending:
                num_pending += 1
            else:
                num_idle += 1

            log.debug(
                'Job flow %s %s for %s, %s to end of hour, %s (%s)' %
                      (jf.jobflowid,
                       'pending' if pending else 'idle',
                       strip_microseconds(time_idle),
                       strip_microseconds(time_to_end_of_hour),
                       ('unpooled' if pool is None else 'in %s pool' % pool),
                       jf.name))

            # filter out job flows that don't meet our criteria
            if (max_hours_idle is not None and
                time_idle <= timedelta(hours=max_hours_idle)):
                continue

            # mins_to_end_of_hour doesn't apply to jobs with pending steps
            if (mins_to_end_of_hour is not None and
                (pending or
                 time_to_end_of_hour >= timedelta(
                    minutes=mins_to_end_of_hour))):
                continue

            if (pooled_only and pool is None):
                continue

            if (unpooled_only and pool is not None):
                continue

            if (pool_name is not None and pool != pool_name):
                continue

            to_terminate.append((jf, pending, time_idle, time_to_end_of_hour))

    log.info(
        'Job flow statuses: %d bootstrapping, %d running, %d pending, %d idle,'
        ' %d active non-streaming, %d done' % (
        num_running, num_bootstrapping, num_pending, num_idle,
        num_non_streaming, num_done))

    terminate_and_notify(runner, to_terminate, dry_run=dry_run,
                         max_mins_locked=max_mins_locked, quiet=quiet)
Пример #14
0
def inspect_and_maybe_terminate_job_flows(conf_paths=None,
                                          dry_run=False,
                                          max_hours_idle=None,
                                          mins_to_end_of_hour=None,
                                          now=None,
                                          pool_name=None,
                                          pooled_only=False,
                                          unpooled_only=False,
                                          max_mins_locked=None,
                                          quiet=False,
                                          **kwargs):

    if now is None:
        now = datetime.utcnow()

    # old default behavior
    if max_hours_idle is None and mins_to_end_of_hour is None:
        max_hours_idle = DEFAULT_MAX_HOURS_IDLE

    runner = EMRJobRunner(conf_paths=conf_paths, **kwargs)
    emr_conn = runner.make_emr_conn()

    log.info(
        'getting info about all job flows (this goes back about 2 months)')
    # We don't filter by job flow state because we want this to work even
    # if Amazon adds another kind of idle state.
    job_flows = describe_all_job_flows(emr_conn)

    num_bootstrapping = 0
    num_done = 0
    num_idle = 0
    num_non_streaming = 0
    num_pending = 0
    num_running = 0

    # a list of tuples of job flow id, name, idle time (as a timedelta)
    to_terminate = []

    for jf in job_flows:

        # check if job flow is done
        if is_job_flow_done(jf):
            num_done += 1

        # check if job flow is bootstrapping
        elif is_job_flow_bootstrapping(jf):
            num_bootstrapping += 1

        # we can't really tell if non-streaming jobs are idle or not, so
        # let them be (see Issue #60)
        elif not is_job_flow_streaming(jf):
            num_non_streaming += 1

        elif is_job_flow_running(jf):
            num_running += 1

        else:
            time_idle = now - time_last_active(jf)
            time_to_end_of_hour = est_time_to_hour(jf, now=now)
            _, pool = pool_hash_and_name(jf)
            pending = job_flow_has_pending_steps(jf)

            if pending:
                num_pending += 1
            else:
                num_idle += 1

            log.debug('Job flow %s %s for %s, %s to end of hour, %s (%s)' %
                      (jf.jobflowid, 'pending' if pending else 'idle',
                       strip_microseconds(time_idle),
                       strip_microseconds(time_to_end_of_hour),
                       ('unpooled' if pool is None else 'in %s pool' % pool),
                       jf.name))

            # filter out job flows that don't meet our criteria
            if (max_hours_idle is not None
                    and time_idle <= timedelta(hours=max_hours_idle)):

                continue

            # mins_to_end_of_hour doesn't apply to jobs with pending steps
            if (mins_to_end_of_hour is not None
                    and (pending or time_to_end_of_hour >=
                         timedelta(minutes=mins_to_end_of_hour))):
                continue

            if (pooled_only and pool is None):
                continue

            if (unpooled_only and pool is not None):
                continue

            if (pool_name is not None and pool != pool_name):
                continue

            to_terminate.append((jf, pending, time_idle, time_to_end_of_hour))

    log.info(
        'Job flow statuses: %d bootstrapping, %d running, %d pending, %d idle,'
        ' %d active non-streaming, %d done' %
        (num_running, num_bootstrapping, num_pending, num_idle,
         num_non_streaming, num_done))

    terminate_and_notify(runner,
                         to_terminate,
                         dry_run=dry_run,
                         max_mins_locked=max_mins_locked,
                         quiet=quiet)
Пример #15
0
 def test_empty(self):
     jf = MockEmrObject()
     self.assertEqual(est_time_to_hour(jf), timedelta(hours=1))
Пример #16
0
 def test_empty(self):
     jf = MockEmrObject()
     self.assertEqual(est_time_to_hour(jf), timedelta(hours=1))