def test_bootstrap_action_isnt_named_master(self): actions = [ dict( Args=['pool-0123456789abcdef0123456789abcdef', 'reflecting'], Name='apprentice', ), ] self.assertEqual(_legacy_pool_hash_and_name(actions), (None, None))
def test_too_many_args(self): actions = [ dict( Args=['cowsay', '-b', 'mrjob'], Name='master', ), ] self.assertEqual(_legacy_pool_hash_and_name(actions), (None, None))
def test_too_few_args(self): actions = [ dict( Args=['pool-0123456789abcdef0123456789abcdef'], Name='master', ), ] self.assertEqual(_legacy_pool_hash_and_name(actions), (None, None))
def test_first_arg_doesnt_start_with_pool(self): actions = [ dict( Args=['cowsay', 'mrjob'], Name='master', ), ] self.assertEqual(_legacy_pool_hash_and_name(actions), (None, None))
def test_pooled_cluster(self): actions = [ dict( Args=['pool-0123456789abcdef0123456789abcdef', 'reflecting'], Name='master', ), ] self.assertEqual(_legacy_pool_hash_and_name(actions), ('0123456789abcdef0123456789abcdef', 'reflecting'))
def test_pooled_cluster_with_other_bootstrap_actions(self): actions = [ dict(Args=[], Name='action 0'), dict(Args=[], Name='action 1'), dict( Args=['pool-0123456789abcdef0123456789abcdef', 'reflecting'], Name='master', ), ] self.assertEqual(_legacy_pool_hash_and_name(actions), ('0123456789abcdef0123456789abcdef', 'reflecting'))
def test_pooled_cluster_with_max_mins_idle(self): # max-mins-idle script is added AFTER the master bootstrap script, # which was a problem when we just look at the last action actions = [ dict( Args=['pool-0123456789abcdef0123456789abcdef', 'reflecting'], Name='master', ), dict( Args=['900', '300'], Name='idle timeout', ), ] self.assertEqual(_legacy_pool_hash_and_name(actions), ('0123456789abcdef0123456789abcdef', 'reflecting'))
def test_empty(self): actions = [] self.assertEqual(_legacy_pool_hash_and_name(actions), (None, None))
def _cluster_to_basic_summary(cluster, now=None): """Extract fields such as creation time, owner, etc. from the cluster. :param cluster: a :py:mod:`boto3` cluster data structure :param now: the current UTC time, as a :py:class:`datetime.datetime`. Defaults to the current time. Returns a dictionary with the following keys. These will be ``None`` if the corresponding field in the cluster is unavailable. * *created*: UTC `datetime.datetime` that the cluster was created, or ``None`` * *end*: UTC `datetime.datetime` that the cluster finished, or ``None`` * *id*: cluster ID, or ``None`` (this should never happen) * *label*: The label for the cluster (usually the module name of the :py:class:`~mrjob.job.MRJob` script that started it), or ``None`` for non-:py:mod:`mrjob` clusters. * *name*: cluster name, or ``None`` (this should never happen) * *nih*: number of normalized instance hours used by the cluster. * *num_steps*: Number of steps in the cluster. * *owner*: The owner for the cluster (usually the user that started it), or ``None`` for non-:py:mod:`mrjob` clusters. * *pool*: pool name (e.g. ``'default'``) if the cluster is pooled, otherwise ``None``. * *ran*: How long the cluster ran, or has been running, as a :py:class:`datetime.timedelta`. This will be ``timedelta(0)`` if the cluster hasn't started. * *ready*: UTC `datetime.datetime` that the cluster finished bootstrapping, or ``None`` * *state*: The cluster's state as a string (e.g. ``'RUNNING'``) """ if now is None: now = _boto3_now() bcs = {} # basic cluster summary to fill in bcs['id'] = cluster['Id'] bcs['name'] = cluster['Name'] Status = cluster['Status'] Timeline = Status.get('Timeline', {}) bcs['created'] = Timeline.get('CreationDateTime') bcs['ready'] = Timeline.get('ReadyDateTime') bcs['end'] = Timeline.get('EndDateTime') if bcs['created']: bcs['ran'] = (bcs['end'] or now) - bcs['created'] else: bcs['ran'] = timedelta(0) bcs['state'] = Status.get('State') bcs['num_steps'] = len(cluster['Steps']) _, bcs['pool'] = _pool_hash_and_name(cluster) if not bcs['pool']: _, bcs['pool'] = _legacy_pool_hash_and_name( cluster['BootstrapActions']) m = _JOB_KEY_RE.match(bcs['name'] or '') if m: bcs['label'], bcs['owner'] = m.group(1), m.group(2) else: bcs['label'], bcs['owner'] = None, None bcs['nih'] = cluster.get('NormalizedInstanceHours', 0) return bcs
def _cluster_to_basic_summary(cluster, now=None): """Extract fields such as creation time, owner, etc. from the cluster. :param cluster: a :py:mod:`boto3` cluster data structure :param now: the current UTC time, as a :py:class:`datetime.datetime`. Defaults to the current time. Returns a dictionary with the following keys. These will be ``None`` if the corresponding field in the cluster is unavailable. * *created*: UTC `datetime.datetime` that the cluster was created, or ``None`` * *end*: UTC `datetime.datetime` that the cluster finished, or ``None`` * *id*: cluster ID, or ``None`` (this should never happen) * *label*: The label for the cluster (usually the module name of the :py:class:`~mrjob.job.MRJob` script that started it), or ``None`` for non-:py:mod:`mrjob` clusters. * *name*: cluster name, or ``None`` (this should never happen) * *nih*: number of normalized instance hours cluster *would* use if it ran to the end of the next full hour ( * *num_steps*: Number of steps in the cluster. * *owner*: The owner for the cluster (usually the user that started it), or ``None`` for non-:py:mod:`mrjob` clusters. * *pool*: pool name (e.g. ``'default'``) if the cluster is pooled, otherwise ``None``. * *ran*: How long the cluster ran, or has been running, as a :py:class:`datetime.timedelta`. This will be ``timedelta(0)`` if the cluster hasn't started. * *ready*: UTC `datetime.datetime` that the cluster finished bootstrapping, or ``None`` * *state*: The cluster's state as a string (e.g. ``'RUNNING'``) """ if now is None: now = _boto3_now() bcs = {} # basic cluster summary to fill in bcs['id'] = cluster['Id'] bcs['name'] = cluster['Name'] Status = cluster['Status'] Timeline = Status.get('Timeline', {}) bcs['created'] = Timeline.get('CreationDateTime') bcs['ready'] = Timeline.get('ReadyDateTime') bcs['end'] = Timeline.get('EndDateTime') if bcs['created']: bcs['ran'] = (bcs['end'] or now) - bcs['created'] else: bcs['ran'] = timedelta(0) bcs['state'] = Status.get('State') bcs['num_steps'] = len(cluster['Steps']) _, bcs['pool'] = _pool_hash_and_name(cluster) if not bcs['pool']: _, bcs['pool'] = _legacy_pool_hash_and_name( cluster['BootstrapActions']) m = _JOB_KEY_RE.match(bcs['name'] or '') if m: bcs['label'], bcs['owner'] = m.group(1), m.group(2) else: bcs['label'], bcs['owner'] = None, None bcs['nih'] = float(cluster.get('NormalizedInstanceHours', 0)) return bcs