示例#1
0
    def test_spark_jar_step_without_mr_job_script(self):
        spark_jar_path = self.makefile('fireflies.jar')
        steps = MRSparkJar(['--jar', spark_jar_path])._steps_desc()

        runner = EMRJobRunner(steps=steps, stdin=BytesIO())

        runner.run()
        runner.cleanup()
示例#2
0
    def test_spark_script_step_without_mr_job_script(self):
        spark_script_path = self.makefile('a_spark_script.py')
        steps = MRSparkScript(['--script', spark_script_path])._steps_desc()

        runner = EMRJobRunner(steps=steps, stdin=BytesIO())

        runner.run()
        runner.cleanup()
示例#3
0
    def test_jar_step_without_mr_job_script(self):
        jar_path = self.makefile('dora.jar')
        steps = MRJustAJar(['--jar', jar_path])._steps_desc()

        runner = EMRJobRunner(steps=steps, stdin=BytesIO(b'backpack'))

        runner.run()
        runner.cleanup()
示例#4
0
    def test_spark_script_step_without_mr_job_script(self):
        spark_script_path = self.makefile('a_spark_script.py')
        steps = MRSparkScript(['--script', spark_script_path])._steps_desc()

        runner = EMRJobRunner(steps=steps, stdin=BytesIO())

        runner.run()
        runner.cleanup()
示例#5
0
    def test_spark_jar_step_without_mr_job_script(self):
        spark_jar_path = self.makefile('fireflies.jar')
        steps = MRSparkJar(['--jar', spark_jar_path])._steps_desc()

        runner = EMRJobRunner(steps=steps, stdin=BytesIO())

        runner.run()
        runner.cleanup()
示例#6
0
    def test_jar_step_without_mr_job_script(self):
        jar_path = self.makefile('dora.jar')
        steps = MRJustAJar(['--jar', jar_path])._steps_desc()

        runner = EMRJobRunner(steps=steps, stdin=BytesIO(b'backpack'))

        runner.run()
        runner.cleanup()
示例#7
0
def main():
    runner_kwargs = build_config()
    logger.info('main() -- runner_kwargs: %s', runner_kwargs)
    emr_client = EMRJobRunner(**runner_kwargs)
    cluster_id = emr_client.make_persistent_cluster()
    logger.info('Cluster-id: %s', cluster_id)

    emr_client = EMRJobRunner(
        mr_job_script='src/hive_step.py',
        cluster_id=cluster_id,
        input_paths=['/dev/null'],
        extra_args=['--jar-region', runner_kwargs['region']],
        **runner_kwargs)
    emr_client.run()