def test_uri_parsing(self): self.assertEqual(is_uri('notauri!'), False) self.assertEqual(is_uri('they://did/the/monster/mash'), True) self.assertEqual(is_s3_uri('s3://a/uri'), True) self.assertEqual(is_s3_uri('s3n://a/uri'), True) self.assertEqual(is_s3_uri('hdfs://a/uri'), False) self.assertEqual(parse_s3_uri('s3://bucket/loc'), ('bucket', 'loc'))
def _candidate_log_subdirs(fs, log_type, log_dir, node_log_path, ssh_host): """Yield lists of subdirectories to look for logs in. Currently, this means first SSH (if *ssh_host* is set), and then *log_dir* (if set). """ # first, try SSH (most up-to-date) if ssh_host: yield _ssh_log_subdirs(fs, log_type, node_log_path=node_log_path, ssh_host=ssh_host) # then try the log directory if log_dir: if is_s3_uri(log_dir): relative_path = _S3_LOG_TYPE_TO_RELATIVE_PATH.get(log_type) else: relative_path = _LOG_TYPE_TO_RELATIVE_PATH.get(log_type) if relative_path is not None: yield [posixpath.join(log_dir, relative_path, "")]
def _candidate_log_subdirs(fs, log_type, log_dir, node_log_path, ssh_host): """Yield lists of subdirectories to look for logs in. Currently, this means first SSH (if *ssh_host* is set), and then *log_dir* (if set). """ # first, try SSH (most up-to-date) if ssh_host: yield _ssh_log_subdirs(fs, log_type, node_log_path=node_log_path, ssh_host=ssh_host) # then try the log directory if log_dir: if is_s3_uri(log_dir): relative_path = _S3_LOG_TYPE_TO_RELATIVE_PATH.get(log_type) else: relative_path = _LOG_TYPE_TO_RELATIVE_PATH.get(log_type) if relative_path is not None: yield [join(log_dir, relative_path, '')]
def simulate_progress(self, jobflow_id, now=None): """Simulate progress on the given job flow. This is automatically run when we call describe_jobflow(). :type jobflow_id: str :param jobflow_id: fake job flow ID :type now: py:class:`datetime.datetime` :param now: alternate time to use as the current time (should be UTC) """ if now is None: now = datetime.utcnow() if self.simulation_iterator: try: self.simulation_iterator.next() except StopIteration: raise AssertionError( 'Simulated progress too many times; bailing out') job_flow = self.mock_emr_job_flows[jobflow_id] # if job is STARTING, move it along to WAITING if job_flow.state == 'STARTING': job_flow.state = 'WAITING' job_flow.startdatetime = to_iso8601(now) # instances are now provisioned and running for ig in job_flow.instancegroups: ig.instancerunningcount = ig.instancerequestcount # if job is done, don't advance it if job_flow.state in ('COMPLETED', 'TERMINATED', 'FAILED'): return # if SHUTTING_DOWN, finish shutting down if job_flow.state == 'SHUTTING_DOWN': if job_flow.reason == 'Shut down as step failed': job_flow.state = 'FAILED' else: job_flow.state = 'TERMINATED' job_flow.enddatetime = to_iso8601(now) return # if a step is currently running, advance it steps = getattr(job_flow, 'steps', None) or [] for step_num, step in enumerate(steps): # skip steps that are already done if step.state in ('COMPLETED', 'FAILED', 'CANCELLED'): continue if step.name in ('Setup Hadoop Debugging', ): step.state = 'COMPLETED' continue # allow steps to get stuck if getattr(step, 'mock_no_progress', None): return # found currently running step! going to handle it, then exit if step.state == 'PENDING': step.state = 'RUNNING' step.startdatetime = to_iso8601(now) return assert step.state == 'RUNNING' step.enddatetime = to_iso8601(now) # check if we're supposed to have an error if (jobflow_id, step_num) in self.mock_emr_failures: step.state = 'FAILED' reason = self.mock_emr_failures[(jobflow_id, step_num)] if reason: job_flow.reason = reason if step.actiononfailure == 'TERMINATE_JOB_FLOW': job_flow.state = 'SHUTTING_DOWN' if not reason: job_flow.reason = 'Shut down as step failed' return step.state = 'COMPLETED' # create fake output if we're supposed to write to S3 output_uri = self._get_step_output_uri(step) if output_uri and is_s3_uri(output_uri): mock_output = self.mock_emr_output.get( (jobflow_id, step_num)) or [''] bucket_name, key_name = parse_s3_uri(output_uri) # write output to S3 for i, bytes in enumerate(mock_output): add_mock_s3_data(self.mock_s3_fs, { bucket_name: {key_name + 'part-%05d' % i: bytes}}) elif (jobflow_id, step_num) in self.mock_emr_output: raise AssertionError( "can't use output for job flow ID %s, step %d " "(it doesn't output to S3)" % (jobflow_id, step_num)) # done! return # no pending steps. shut down job if appropriate if job_flow.keepjobflowalivewhennosteps == 'true': job_flow.state = 'WAITING' job_flow.reason = 'Waiting for steps to run' else: job_flow.state = 'COMPLETED' job_flow.reason = 'Steps Completed'
def can_handle_path(self, path): return is_s3_uri(path)
def test_is_s3_uri(self): self.assertEqual(is_s3_uri('s3://a/uri'), True) self.assertEqual(is_s3_uri('s3n://a/uri'), True) self.assertEqual(is_s3_uri('s3a://a/uri'), True) self.assertEqual(is_s3_uri('hdfs://a/uri'), False)
def _simulate_progress(self, cluster_id, now=None): """Simulate progress on the given cluster. This is automatically run when we call :py:meth:`describe_step`, and, when the cluster is ``TERMINATING``, :py:meth:`describe_cluster`. :type cluster_id: str :param cluster_id: fake cluster ID :type now: py:class:`datetime.datetime` :param now: alternate time to use as the current time (should be UTC) """ # TODO: this doesn't actually update steps to CANCELLED when # cluster is shut down if now is None: now = _boto3_now() cluster = self.mock_emr_clusters[cluster_id] # allow clusters to get stuck if cluster.get('_DelayProgressSimulation', 0) > 0: cluster['_DelayProgressSimulation'] -= 1 return # this code is pretty loose about updating StateChangeReason # (for the cluster, instance groups, and steps). Add this as needed. # if job is STARTING, move it along to BOOTSTRAPPING if cluster['Status']['State'] == 'STARTING': cluster['Status']['State'] = 'BOOTSTRAPPING' # master now has a hostname cluster['MasterPublicDnsName'] = 'master.%s.mock' % cluster['Id'] # instances are now provisioned for ig in cluster['_InstanceGroups']: ig['RunningInstanceCount'] = ig['RequestedInstanceCount'] ig['Status']['State'] = 'BOOTSTRAPPING' return # if job is TERMINATING, move along to terminated if cluster['Status']['State'] == 'TERMINATING': code = cluster['Status']['StateChangeReason'].get('Code') if code and code.endswith('_FAILURE'): cluster['Status']['State'] = 'TERMINATED_WITH_ERRORS' else: cluster['Status']['State'] = 'TERMINATED' return # if job is done, nothing to do if cluster['Status']['State'] in ('TERMINATED', 'TERMINATED_WITH_ERRORS'): return # if job is BOOTSTRAPPING, move it along to RUNNING and continue if cluster['Status']['State'] == 'BOOTSTRAPPING': cluster['Status']['State'] = 'RUNNING' for ig in cluster['_InstanceGroups']: ig['Status']['State'] = 'RUNNING' # at this point, should be RUNNING or WAITING assert cluster['Status']['State'] in ('RUNNING', 'WAITING') # simulate self-termination if cluster_id in self.mock_emr_self_termination: cluster['Status']['State'] = 'TERMINATING' cluster['Status']['StateChangeReason'] = dict( Code='INSTANCE_FAILURE', Message='The master node was terminated. ', # sic ) for step in cluster['_Steps']: if step['Status']['State'] in ('PENDING', 'RUNNING'): step['Status']['State'] = 'CANCELLED' # not INTERRUPTED return # try to find the next step, and advance it for step_num, step in enumerate(cluster['_Steps']): # skip steps that are already done if step['Status']['State'] in ('COMPLETED', 'FAILED', 'CANCELLED', 'INTERRUPTED'): continue # found currently running step! handle it, then exit # start PENDING step if step['Status']['State'] == 'PENDING': step['Status']['State'] = 'RUNNING' step['Status']['Timeline']['StartDateTime'] = now return assert step['Status']['State'] == 'RUNNING' # check if we're supposed to have an error if (cluster_id, step_num) in self.mock_emr_failures: step['Status']['State'] = 'FAILED' if step['ActionOnFailure'] in ('TERMINATE_CLUSTER', 'TERMINATE_JOB_FLOW'): cluster['Status']['State'] = 'TERMINATING' cluster['Status']['StateChangeReason']['Code'] = ( 'STEP_FAILURE') cluster['Status']['StateChangeReason']['Message'] = ( 'Shut down as step failed') for step in cluster['_Steps']: if step['Status']['State'] in ('PENDING', 'RUNNING'): step['Status']['State'] = 'CANCELLED' return # complete step step['Status']['State'] = 'COMPLETED' step['Status']['Timeline']['EndDateTime'] = now # create fake output if we're supposed to write to S3 output_uri = self._get_step_output_uri(step['Config']['Args']) if output_uri and is_s3_uri(output_uri): mock_output = self.mock_emr_output.get( (cluster_id, step_num)) or [b''] bucket_name, key_name = parse_s3_uri(output_uri) # write output to S3 for i, part in enumerate(mock_output): add_mock_s3_data( self.mock_s3_fs, {bucket_name: { key_name + 'part-%05d' % i: part }}) elif (cluster_id, step_num) in self.mock_emr_output: raise AssertionError( "can't use output for cluster ID %s, step %d " "(it doesn't output to S3)" % (cluster_id, step_num)) # done! # if this is the last step, continue to autotermination code, below if step_num < len(cluster['_Steps']) - 1: return # no pending steps. should we wait, or shut down? if cluster['AutoTerminate']: cluster['Status']['State'] = 'TERMINATING' cluster['Status']['StateChangeReason']['Code'] = ( 'ALL_STEPS_COMPLETED') cluster['Status']['StateChangeReason']['Message'] = ( 'Steps Completed') else: # just wait cluster['Status']['State'] = 'WAITING' cluster['Status']['StateChangeReason'] = {} return
def simulate_progress(self, jobflow_id, now=None): """Simulate progress on the given job flow. This is automatically run when we call describe_jobflow(). :type jobflow_id: str :param jobflow_id: fake job flow ID :type now: py:class:`datetime.datetime` :param now: alternate time to use as the current time (should be UTC) """ if now is None: now = datetime.datetime.utcnow() if self.simulation_steps_left <= 0: raise AssertionError("Simulated progress too many times; bailing out") self.simulation_steps_left -= 1 job_flow = self.mock_emr_job_flows[jobflow_id] # if job is STARTING, move it along to WAITING if job_flow.state == "STARTING": job_flow.state = "WAITING" job_flow.startdatetime = to_iso8601(now) # if job is done, don't advance it if job_flow.state in ("COMPLETED", "TERMINATED", "FAILED"): return # if SHUTTING_DOWN, finish shutting down if job_flow.state == "SHUTTING_DOWN": if job_flow.reason == "Shut down as step failed": job_flow.state = "FAILED" else: job_flow.state = "TERMINATED" job_flow.enddatetime = to_iso8601(now) return # if a step is currently running, advance it for step_num, step in enumerate(job_flow.steps): # skip steps that are already done if step.state in ("COMPLETED", "FAILED", "CANCELLED"): continue if step.name in ("Setup Hadoop Debugging",): step.state = "COMPLETED" continue # found currently running step! going to handle it, then exit if step.state == "PENDING": step.state = "RUNNING" step.startdatetime = to_iso8601(now) return assert step.state == "RUNNING" step.enddatetime = to_iso8601(now) # check if we're supposed to have an error if (jobflow_id, step_num) in self.mock_emr_failures: step.state = "FAILED" reason = self.mock_emr_failures[(jobflow_id, step_num)] if reason: job_flow.reason = reason if step.actiononfailure == "TERMINATE_JOB_FLOW": job_flow.state = "SHUTTING_DOWN" if not reason: job_flow.reason = "Shut down as step failed" return step.state = "COMPLETED" # create fake output if we're supposed to write to S3 output_uri = self._get_step_output_uri(step) if output_uri and is_s3_uri(output_uri): mock_output = self.mock_emr_output.get((jobflow_id, step_num)) or [""] bucket_name, key_name = parse_s3_uri(output_uri) # write output to S3 for i, bytes in enumerate(mock_output): add_mock_s3_data(self.mock_s3_fs, {bucket_name: {key_name + "part-%05d" % i: bytes}}) elif (jobflow_id, step_num) in self.mock_emr_output: raise AssertionError( "can't use output for job flow ID %s, step %d " "(it doesn't output to S3)" % (jobflow_id, step_num) ) # done! return # no pending steps. shut down job if appropriate if job_flow.keepjobflowalivewhennosteps == "true": job_flow.state = "WAITING" job_flow.reason = "Waiting for steps to run" else: job_flow.state = "COMPLETED" job_flow.reason = "Steps Completed"