def create_fake_job_flows(self):
        self.now = datetime.utcnow().replace(microsecond=0)

        # empty job
        self.mock_emr_job_flows['j-EMPTY'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            state='WAITING',
        )

        # Build a step object easily
        # also make it respond to .args()
        def step(jar='/home/hadoop/contrib/streaming/hadoop-streaming.jar',
                 args=[
                     '-mapper', 'my_job.py --mapper', '-reducer',
                     'my_job.py --reducer'
                 ],
                 state='COMPLETE',
                 start_time_back=None,
                 end_time_back=None,
                 name='Streaming Step',
                 action_on_failure='TERMINATE_JOB_FLOW',
                 **kwargs):
            if start_time_back:
                kwargs['startdatetime'] = to_iso8601(self.now - timedelta(
                    hours=start_time_back))
            if end_time_back:
                kwargs['enddatetime'] = to_iso8601(self.now - timedelta(
                    hours=end_time_back))
            kwargs['args'] = [MockEmrObject(value=a) for a in args]
            return MockEmrObject(jar=jar,
                                 state=state,
                                 name=name,
                                 action_on_failure=action_on_failure,
                                 **kwargs)

        # currently running job
        self.mock_emr_job_flows['j-CURRENTLY_RUNNING'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now -
                                     timedelta(hours=4, minutes=15)),
            state='RUNNING',
            steps=[step(start_time_back=4, state='RUNNING')],
        )

        # finished job flow
        self.mock_emr_job_flows['j-DONE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            enddatetime=to_iso8601(self.now - timedelta(hours=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=9)),
            state='COMPLETE',
            steps=[step(start_time_back=8, end_time_back=6)],
        )

        # idle job flow
        self.mock_emr_job_flows['j-DONE_AND_IDLE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[step(start_time_back=4, end_time_back=2)],
        )

        # hive job flow (looks completed but isn't)
        self.mock_emr_job_flows['j-HIVE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[
                step(
                    start_time_back=4,
                    end_time_back=4,
                    jar=('s3://us-east-1.elasticmapreduce/libs/script-runner/'
                         'script-runner.jar'),
                    args=[],
                )
            ],
        )

        # custom hadoop streaming jar
        self.mock_emr_job_flows['j-CUSTOM_DONE_AND_IDLE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[
                step(
                    start_time_back=4,
                    end_time_back=4,
                    jar=('s3://my_bucket/tmp/somejob/files/'
                         'oddjob-0.0.3-SNAPSHOT-standalone.jar'),
                    args=[],
                )
            ],
        )

        mock_conn = MockEmrConnection()

        # hadoop debugging without any other steps
        jobflow_id = mock_conn.run_jobflow(name='j-DEBUG_ONLY',
                                           log_uri='',
                                           enable_debugging=True)
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows['j-DEBUG_ONLY'] = jf
        jf.state = 'WAITING'
        jf.startdatetime = to_iso8601(self.now - timedelta(hours=2))
        jf.steps[0].enddatetime = to_iso8601(self.now - timedelta(hours=2))

        # hadoop debugging + actual job
        # same jar as hive but with different args
        jobflow_id = mock_conn.run_jobflow(name='j-HADOOP_DEBUGGING',
                                           log_uri='',
                                           enable_debugging=True,
                                           steps=[step()])
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows['j-HADOOP_DEBUGGING'] = jf
        jf.state = 'WAITING'
        jf.creationdatetime = to_iso8601(self.now - timedelta(hours=6))
        jf.startdatetime = to_iso8601(self.now - timedelta(hours=5))
        # Need to reset times manually because mockboto resets them
        jf.steps[0].enddatetime = to_iso8601(self.now - timedelta(hours=5))
        jf.steps[1].startdatetime = to_iso8601(self.now - timedelta(hours=4))
        jf.steps[1].enddatetime = to_iso8601(self.now - timedelta(hours=2))

        # skip cancelled steps
        self.mock_emr_job_flows['j-IDLE_AND_FAILED'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[
                step(start_time_back=4, end_time_back=3, state='FAILED'),
                step(state='CANCELLED', )
            ],
        )

        # pooled job flow reaching end of full hour
        self.mock_emr_job_flows['j-POOLED'] = MockEmrObject(
            bootstrapactions=[
                MockEmrObject(args=[]),
                MockEmrObject(args=[
                    MockEmrObject(
                        value='pool-0123456789abcdef0123456789abcdef'),
                    MockEmrObject(value='reflecting'),
                ]),
            ],
            creationdatetime=to_iso8601(self.now - timedelta(hours=1)),
            startdatetime=to_iso8601(self.now - timedelta(minutes=55)),
            state='WAITING',
            steps=[],
        )

        # add job flow IDs and fake names to the mock job flows
        for jfid, jf in self.mock_emr_job_flows.iteritems():
            jf.jobflowid = jfid
            jf.name = jfid[2:].replace('_', ' ').title() + ' Job Flow'
    def create_fake_job_flows(self):
        self.now = datetime.utcnow().replace(microsecond=0)

        # Build a step object easily
        # also make it respond to .args()
        def step(
            jar="/home/hadoop/contrib/streaming/hadoop-streaming.jar",
            args=["-mapper", "my_job.py --mapper", "-reducer", "my_job.py --reducer"],
            state="COMPLETE",
            create_hours_ago=None,
            start_hours_ago=None,
            end_hours_ago=None,
            name="Streaming Step",
            action_on_failure="TERMINATE_JOB_FLOW",
            **kwargs
        ):
            if create_hours_ago:
                kwargs["creationdatetime"] = to_iso8601(self.now - timedelta(hours=create_hours_ago))
            if start_hours_ago:
                kwargs["startdatetime"] = to_iso8601(self.now - timedelta(hours=start_hours_ago))
            if end_hours_ago:
                kwargs["enddatetime"] = to_iso8601(self.now - timedelta(hours=end_hours_ago))
            kwargs["args"] = [MockEmrObject(value=a) for a in args]
            return MockEmrObject(jar=jar, state=state, name=name, action_on_failure=action_on_failure, **kwargs)

        # empty job
        self.mock_emr_job_flows["j-EMPTY"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)), state="STARTING"
        )

        # job that's bootstrapping
        self.mock_emr_job_flows["j-BOOTSTRAPPING"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            startdatetime=to_iso8601(self.now - timedelta(hours=9, minutes=55)),
            state="BOOTSTRAPPING",
            steps=[step(create_hours_ago=10, state="PENDING")],
        )

        # currently running job
        self.mock_emr_job_flows["j-CURRENTLY_RUNNING"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=4, minutes=10)),
            startdatetime=to_iso8601(self.now - timedelta(hours=4, minutes=15)),
            state="RUNNING",
            steps=[step(start_hours_ago=4, state="RUNNING")],
        )

        # finished job flow
        self.mock_emr_job_flows["j-DONE"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            enddatetime=to_iso8601(self.now - timedelta(hours=5)),
            readydatetime=to_iso8601(self.now - timedelta(hours=8)),
            startdatetime=to_iso8601(self.now - timedelta(hours=9)),
            state="COMPLETE",
            steps=[step(start_hours_ago=8, end_hours_ago=6)],
        )

        # idle job flow
        self.mock_emr_job_flows["j-DONE_AND_IDLE"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state="WAITING",
            steps=[step(start_hours_ago=4, end_hours_ago=2)],
        )

        # hive job flow (looks completed but isn't)
        self.mock_emr_job_flows["j-HIVE"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state="WAITING",
            steps=[
                step(
                    start_hours_ago=4,
                    end_hours_ago=4,
                    jar=("s3://us-east-1.elasticmapreduce/libs/script-runner/" "script-runner.jar"),
                    args=[],
                )
            ],
        )

        # custom hadoop streaming jar
        self.mock_emr_job_flows["j-CUSTOM_DONE_AND_IDLE"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state="WAITING",
            steps=[
                step(
                    start_hours_ago=4,
                    end_hours_ago=4,
                    jar=("s3://my_bucket/tmp/somejob/files/" "oddjob-0.0.3-SNAPSHOT-standalone.jar"),
                    args=[],
                )
            ],
        )

        mock_conn = MockEmrConnection()

        # hadoop debugging without any other steps
        jobflow_id = mock_conn.run_jobflow(
            name="j-DEBUG_ONLY", log_uri="", enable_debugging=True, now=self.now - timedelta(hours=3, minutes=5)
        )
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows["j-DEBUG_ONLY"] = jf
        jf.state = "WAITING"
        jf.startdatetime = to_iso8601(self.now - timedelta(hours=3))
        jf.readydatetime = to_iso8601(self.now - timedelta(hours=2, minutes=55))
        jf.steps[0].enddatetime = to_iso8601(self.now - timedelta(hours=2))

        # hadoop debugging + actual job
        # same jar as hive but with different args
        jobflow_id = mock_conn.run_jobflow(
            name="j-HADOOP_DEBUGGING",
            log_uri="",
            enable_debugging=True,
            steps=[step()],
            now=self.now - timedelta(hours=6),
        )
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows["j-HADOOP_DEBUGGING"] = jf
        jf.state = "WAITING"
        jf.startdatetime = to_iso8601(self.now - timedelta(hours=5))
        jf.readydatetime = to_iso8601(self.now - timedelta(hours=4, minutes=55))
        # Need to reset times manually because mockboto resets them
        jf.steps[0].enddatetime = to_iso8601(self.now - timedelta(hours=5))
        jf.steps[1].startdatetime = to_iso8601(self.now - timedelta(hours=4))
        jf.steps[1].enddatetime = to_iso8601(self.now - timedelta(hours=2))

        # should skip cancelled steps
        self.mock_emr_job_flows["j-IDLE_AND_FAILED"] = MockEmrObject(
            state="WAITING",
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[step(start_hours_ago=4, end_hours_ago=3, state="FAILED"), step(state="CANCELLED")],
        )

        # pooled job flow reaching end of full hour
        self.mock_emr_job_flows["j-POOLED"] = MockEmrObject(
            bootstrapactions=[
                MockEmrObject(args=[]),
                MockEmrObject(
                    args=[
                        MockEmrObject(value="pool-0123456789abcdef0123456789abcdef"),
                        MockEmrObject(value="reflecting"),
                    ]
                ),
            ],
            creationdatetime=to_iso8601(self.now - timedelta(hours=1)),
            readydatetime=to_iso8601(self.now - timedelta(minutes=50)),
            startdatetime=to_iso8601(self.now - timedelta(minutes=55)),
            state="WAITING",
            steps=[],
        )

        # job flow that has had pending jobs but hasn't run them
        self.mock_emr_job_flows["j-PENDING_BUT_IDLE"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=3)),
            readydatetime=to_iso8601(self.now - timedelta(hours=2, minutes=50)),
            startdatetime=to_iso8601(self.now - timedelta(hours=2, minutes=55)),
            state="RUNNING",
            steps=[step(create_hours_ago=3, state="PENDING")],
        )

        # add job flow IDs and fake names to the mock job flows
        for jfid, jf in self.mock_emr_job_flows.iteritems():
            jf.jobflowid = jfid
            jf.name = jfid[2:].replace("_", " ").title() + " Job Flow"
    def create_fake_job_flows(self):
        self.now = datetime.utcnow().replace(microsecond=0)
        self.add_mock_s3_data({'my_bucket': {}})

        # Build a step object easily
        # also make it respond to .args()
        def step(jar='/home/hadoop/contrib/streaming/hadoop-streaming.jar',
                 args=['-mapper', 'my_job.py --mapper',
                       '-reducer', 'my_job.py --reducer'],
                 state='COMPLETE',
                 create_hours_ago=None,
                 start_hours_ago=None,
                 end_hours_ago=None,
                 name='Streaming Step',
                 action_on_failure='TERMINATE_CLUSTER',
                 **kwargs):
            if create_hours_ago:
                kwargs['creationdatetime'] = to_iso8601(
                    self.now - timedelta(hours=create_hours_ago))
            if start_hours_ago:
                kwargs['startdatetime'] = to_iso8601(
                    self.now - timedelta(hours=start_hours_ago))
            if end_hours_ago:
                kwargs['enddatetime'] = to_iso8601(
                    self.now - timedelta(hours=end_hours_ago))
            kwargs['args'] = [MockEmrObject(value=a) for a in args]
            return MockEmrObject(
                jar=jar, state=state, name=name,
                action_on_failure=action_on_failure, **kwargs)

        # empty job
        self.mock_emr_job_flows['j-EMPTY'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            state='STARTING',
            steps=[],
        )

        # job that's bootstrapping
        self.mock_emr_job_flows['j-BOOTSTRAPPING'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            startdatetime=to_iso8601(
                self.now - timedelta(hours=9, minutes=55)),
            state='BOOTSTRAPPING',
            steps=[step(create_hours_ago=10, state='PENDING')],
        )

        # currently running job
        self.mock_emr_job_flows['j-CURRENTLY_RUNNING'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=4,
                                                          minutes=10)),
            startdatetime=to_iso8601(self.now - timedelta(hours=4,
                                                          minutes=15)),
            state='RUNNING',
            steps=[step(start_hours_ago=4, state='RUNNING')],
        )

        # finished job flow
        self.mock_emr_job_flows['j-DONE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            enddatetime=to_iso8601(self.now - timedelta(hours=5)),
            readydatetime=to_iso8601(self.now - timedelta(hours=8)),
            startdatetime=to_iso8601(self.now - timedelta(hours=9)),
            state='COMPLETE',
            steps=[step(start_hours_ago=8, end_hours_ago=6)],
        )

        # idle job flow
        self.mock_emr_job_flows['j-DONE_AND_IDLE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[step(start_hours_ago=4, end_hours_ago=2)],
        )

        # idle job flow with an active lock
        self.mock_emr_job_flows['j-IDLE_AND_LOCKED'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[step(start_hours_ago=4, end_hours_ago=2)],
        )
        self.add_mock_s3_data({
            'my_bucket': {
                'locks/j-IDLE_AND_LOCKED/2': b'not_you',
            },
        }, time_modified=datetime.utcnow())

        # idle job flow with an expired lock
        self.mock_emr_job_flows['j-IDLE_AND_EXPIRED'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[step(start_hours_ago=4, end_hours_ago=2)],
        )
        self.add_mock_s3_data({
            'my_bucket': {
                'locks/j-IDLE_AND_EXPIRED/2': b'not_you',
            },
        }, time_modified=datetime.utcnow()-timedelta(minutes=5))

        # idle job flow with an expired lock
        self.mock_emr_job_flows['j-IDLE_BUT_INCOMPLETE_STEPS'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[step(start_hours_ago=4, end_hours_ago=None)],
        )

        # hive job flow (looks completed but isn't)
        self.mock_emr_job_flows['j-HIVE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[step(
                start_hours_ago=4,
                end_hours_ago=4,
                jar=('s3://us-east-1.elasticmapreduce/libs/script-runner/'
                     'script-runner.jar'),
                args=[],
            )],
        )

        # custom hadoop streaming jar
        self.mock_emr_job_flows['j-CUSTOM_DONE_AND_IDLE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[step(
                start_hours_ago=4,
                end_hours_ago=4,
                jar=('s3://my_bucket/tmp/somejob/files/'
                     'oddjob-0.0.3-SNAPSHOT-standalone.jar'),
                args=[],
            )],
        )

        mock_conn = MockEmrConnection()

        # hadoop debugging without any other steps
        jobflow_id = mock_conn.run_jobflow(name='j-DEBUG_ONLY',
                                           log_uri='',
                                           enable_debugging=True,
                                           now=self.now -
                                               timedelta(hours=3, minutes=5))
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows['j-DEBUG_ONLY'] = jf
        jf.state = 'WAITING'
        jf.startdatetime = to_iso8601(
            self.now - timedelta(hours=3))
        jf.readydatetime = to_iso8601(
            self.now - timedelta(hours=2, minutes=55))
        jf.steps[0].enddatetime = to_iso8601(self.now - timedelta(hours=2))

        # hadoop debugging + actual job
        # same jar as hive but with different args
        jobflow_id = mock_conn.run_jobflow(name='j-HADOOP_DEBUGGING',
                                           log_uri='',
                                           enable_debugging=True,
                                           now=self.now -
                                           timedelta(hours=6))
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows['j-HADOOP_DEBUGGING'] = jf
        jf.steps.append(step())
        jf.state = 'WAITING'
        jf.startdatetime = to_iso8601(self.now - timedelta(hours=5))
        jf.readydatetime = to_iso8601(
            self.now - timedelta(hours=4, minutes=55))
        # Need to reset times manually because mockboto resets them
        jf.steps[0].enddatetime = to_iso8601(self.now - timedelta(hours=5))
        jf.steps[1].startdatetime = to_iso8601(self.now - timedelta(hours=4))
        jf.steps[1].enddatetime = to_iso8601(self.now - timedelta(hours=2))

        # should skip cancelled steps
        self.mock_emr_job_flows['j-IDLE_AND_FAILED'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[
                step(start_hours_ago=4, end_hours_ago=3, state='FAILED'),
                step(
                    state='CANCELLED',
                )
            ],
        )

        # pooled job flow reaching end of full hour
        self.mock_emr_job_flows['j-POOLED'] = MockEmrObject(
            bootstrapactions=[
                MockEmrObject(args=[], name='action 0'),
                MockEmrObject(args=[
                    MockEmrObject(
                        value='pool-0123456789abcdef0123456789abcdef'),
                    MockEmrObject(value='reflecting'),
                ], name='master'),
            ],
            creationdatetime=to_iso8601(self.now - timedelta(hours=1)),
            readydatetime=to_iso8601(self.now - timedelta(minutes=50)),
            startdatetime=to_iso8601(self.now - timedelta(minutes=55)),
            state='WAITING',
            steps=[],
        )

        # job flow that has had pending jobs but hasn't run them
        self.mock_emr_job_flows['j-PENDING_BUT_IDLE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=3)),
            readydatetime=to_iso8601(
                self.now - timedelta(hours=2, minutes=50)),
            startdatetime=to_iso8601(
                self.now - timedelta(hours=2, minutes=55)),
            state='RUNNING',
            steps=[step(create_hours_ago=3, state='PENDING')],
        )

        # add job flow IDs and fake names to the mock job flows
        for jfid, jf in self.mock_emr_job_flows.items():
            jf.jobflowid = jfid
            jf.name = jfid[2:].replace('_', ' ').title() + ' Job Flow'
Exemplo n.º 4
0
    def create_fake_job_flows(self):
        self.now = datetime.utcnow().replace(microsecond=0)

        # empty job
        self.mock_emr_job_flows['j-EMPTY'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            steps=[],
        )

        # Build a step object easily
        # also make it respond to .args()
        def step(jar='/home/hadoop/contrib/streaming/hadoop-streaming.jar',
                 args=['-mapper', 'my_job.py --mapper',
                       '-reducer', 'my_job.py --reducer'],
                 state='COMPLETE',
                 start_time_back=None,
                 end_time_back=None,
                 name='Streaming Step',
                 action_on_failure='TERMINATE_JOB_FLOW',
                 **kwargs):
            if start_time_back:
                kwargs['startdatetime'] = to_iso8601(
                    self.now - timedelta(hours=start_time_back))
            if end_time_back:
                kwargs['enddatetime'] = to_iso8601(
                    self.now - timedelta(hours=end_time_back))
            kwargs['args'] = [MockEmrObject(value=a) for a in args]
            return MockEmrObject(
                jar=jar, state=state, name=name,
                action_on_failure=action_on_failure, **kwargs)

        # currently running job
        self.mock_emr_job_flows['j-CURRENTLY_RUNNING'] = MockEmrObject(
            state='RUNNING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[step(start_time_back=4, state='RUNNING')],
        )

        # finished job flow
        self.mock_emr_job_flows['j-DONE'] = MockEmrObject(
            state='COMPLETE',
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            startdatetime=to_iso8601(self.now - timedelta(hours=9)),
            enddatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[step(start_time_back=8, end_time_back=6)],
        )

        # idle job flow
        self.mock_emr_job_flows['j-DONE_AND_IDLE'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[step(start_time_back=4, end_time_back=2)],
        )

        # hive job flow (looks completed but isn't)
        self.mock_emr_job_flows['j-HIVE'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[step(
                start_time_back=4,
                end_time_back=4,
                jar='s3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar',
                args=[],
            )],
        )

        # custom hadoop streaming jar
        self.mock_emr_job_flows['j-CUSTOM_DONE_AND_IDLE'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[step(
                start_time_back=4,
                end_time_back=4,
                jar='s3://my_bucket/tmp/somejob/files/oddjob-0.0.3-SNAPSHOT-standalone.jar',
                args=[],
            )],
        )

        mock_conn = MockEmrConnection()

        # hadoop debugging without any other steps
        jobflow_id = mock_conn.run_jobflow(name='j-DEBUG_ONLY',
                                           log_uri='',
                                           enable_debugging=True)
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows['j-DEBUG_ONLY'] = jf
        jf.state = 'WAITING'
        jf.startdatetime=to_iso8601(self.now - timedelta(hours=2))
        jf.steps[0].enddatetime=to_iso8601(self.now - timedelta(hours=2))

        # hadoop debugging + actual job
        # same jar as hive but with different args
        jobflow_id = mock_conn.run_jobflow(name='j-HADOOP_DEBUGGING',
                                           log_uri='',
                                           enable_debugging=True,
                                           steps=[step()])
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows['j-HADOOP_DEBUGGING'] = jf
        jf.state = 'WAITING'
        jf.creationdatetime = to_iso8601(self.now - timedelta(hours=6))
        jf.startdatetime = to_iso8601(self.now - timedelta(hours=5))
        # Need to reset times manually because mockboto resets them
        jf.steps[0].enddatetime = to_iso8601(self.now - timedelta(hours=5))
        jf.steps[1].startdatetime = to_iso8601(self.now - timedelta(hours=4))
        jf.steps[1].enddatetime = to_iso8601(self.now - timedelta(hours=2))

        # skip cancelled steps
        self.mock_emr_job_flows['j-IDLE_AND_FAILED'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[
                step(start_time_back=4, end_time_back=3, state='FAILED'),
                step(
                    state='CANCELLED',
                )
            ],
        )

        # add job flow IDs and fake names to the mock job flows
        for jfid, jf in self.mock_emr_job_flows.iteritems():
            jf.jobflowid = jfid
            jf.name = jfid[2:].replace('_', ' ').title() + ' Job Flow'
Exemplo n.º 5
0
 def mock_botoemr_EmrConnection(*args, **kwargs):
     kwargs['mock_s3_fs'] = self.mock_s3_fs
     kwargs['mock_emr_job_flows'] = self.mock_emr_job_flows
     kwargs['mock_emr_failures'] = self.mock_emr_failures
     kwargs['mock_emr_output'] = self.mock_emr_output
     return MockEmrConnection(*args, **kwargs)