def test_spark_jar_step_without_mr_job_script(self): spark_jar_path = self.makefile('fireflies.jar') steps = MRSparkJar(['--jar', spark_jar_path])._steps_desc() runner = EMRJobRunner(steps=steps, stdin=BytesIO()) runner.run() runner.cleanup()
def test_spark_script_step_without_mr_job_script(self): spark_script_path = self.makefile('a_spark_script.py') steps = MRSparkScript(['--script', spark_script_path])._steps_desc() runner = EMRJobRunner(steps=steps, stdin=BytesIO()) runner.run() runner.cleanup()
def test_jar_step_without_mr_job_script(self): jar_path = self.makefile('dora.jar') steps = MRJustAJar(['--jar', jar_path])._steps_desc() runner = EMRJobRunner(steps=steps, stdin=BytesIO(b'backpack')) runner.run() runner.cleanup()
def main(): runner_kwargs = build_config() logger.info('main() -- runner_kwargs: %s', runner_kwargs) emr_client = EMRJobRunner(**runner_kwargs) cluster_id = emr_client.make_persistent_cluster() logger.info('Cluster-id: %s', cluster_id) emr_client = EMRJobRunner( mr_job_script='src/hive_step.py', cluster_id=cluster_id, input_paths=['/dev/null'], extra_args=['--jar-region', runner_kwargs['region']], **runner_kwargs) emr_client.run()