def execute(self, context: 'Context') -> int: emr_hook = EmrHook(aws_conn_id=self.aws_conn_id) emr = emr_hook.get_conn() if self.do_xcom_push: context['ti'].xcom_push(key='cluster_id', value=self.cluster_id) EmrClusterLink.persist( context=context, operator=self, region_name=emr_hook.conn_region_name, aws_partition=emr_hook.conn_partition, job_flow_id=self.cluster_id, ) self.log.info('Modifying cluster %s', self.cluster_id) response = emr.modify_cluster( ClusterId=self.cluster_id, StepConcurrencyLevel=self.step_concurrency_level) if response['ResponseMetadata']['HTTPStatusCode'] != 200: raise AirflowException(f'Modify cluster failed: {response}') else: self.log.info('Steps concurrency level %d', response['StepConcurrencyLevel']) return response['StepConcurrencyLevel']
def execute(self, context: 'Context') -> str: emr = EmrHook(aws_conn_id=self.aws_conn_id, emr_conn_id=self.emr_conn_id, region_name=self.region_name) self.log.info( 'Creating JobFlow using aws-conn-id: %s, emr-conn-id: %s', self.aws_conn_id, self.emr_conn_id) if isinstance(self.job_flow_overrides, str): job_flow_overrides: Dict[str, Any] = ast.literal_eval( self.job_flow_overrides) self.job_flow_overrides = job_flow_overrides else: job_flow_overrides = self.job_flow_overrides response = emr.create_job_flow(job_flow_overrides) if not response['ResponseMetadata']['HTTPStatusCode'] == 200: raise AirflowException(f'JobFlow creation failed: {response}') else: job_flow_id = response['JobFlowId'] self.log.info('JobFlow with id %s created', job_flow_id) EmrClusterLink.persist( context=context, operator=self, region_name=emr.conn_region_name, aws_partition=emr.conn_partition, job_flow_id=job_flow_id, ) return job_flow_id
def execute(self, context: 'Context') -> None: emr_hook = EmrHook(aws_conn_id=self.aws_conn_id) emr = emr_hook.get_conn() EmrClusterLink.persist( context=context, operator=self, region_name=emr_hook.conn_region_name, aws_partition=emr_hook.conn_partition, job_flow_id=self.job_flow_id, ) self.log.info('Terminating JobFlow %s', self.job_flow_id) response = emr.terminate_job_flows(JobFlowIds=[self.job_flow_id]) if not response['ResponseMetadata']['HTTPStatusCode'] == 200: raise AirflowException(f'JobFlow termination failed: {response}') else: self.log.info('JobFlow with id %s terminated', self.job_flow_id)
def execute(self, context: 'Context') -> List[str]: emr_hook = EmrHook(aws_conn_id=self.aws_conn_id) emr = emr_hook.get_conn() job_flow_id = self.job_flow_id or emr_hook.get_cluster_id_by_name( str(self.job_flow_name), self.cluster_states) if not job_flow_id: raise AirflowException( f'No cluster found for name: {self.job_flow_name}') if self.do_xcom_push: context['ti'].xcom_push(key='job_flow_id', value=job_flow_id) EmrClusterLink.persist( context=context, operator=self, region_name=emr_hook.conn_region_name, aws_partition=emr_hook.conn_partition, job_flow_id=job_flow_id, ) self.log.info('Adding steps to %s', job_flow_id) # steps may arrive as a string representing a list # e.g. if we used XCom or a file then: steps="[{ step1 }, { step2 }]" steps = self.steps if isinstance(steps, str): steps = ast.literal_eval(steps) response = emr.add_job_flow_steps(JobFlowId=job_flow_id, Steps=steps) if not response['ResponseMetadata']['HTTPStatusCode'] == 200: raise AirflowException(f'Adding steps failed: {response}') else: self.log.info('Steps %s added to JobFlow', response['StepIds']) return response['StepIds']