def yield_clusters(max_days_ago=None, now=None, **runner_kwargs): """Get relevant job flow information from EMR. :param float max_days_ago: If set, don't fetch job flows created longer than this many days ago. :param now: the current UTC time, as a :py:class:`datetime.datetime`. Defaults to the current time. :param runner_kwargs: keyword args to pass through to :py:class:`~mrjob.emr.EMRJobRunner` """ if now is None: now = datetime.utcnow() emr_conn = EMRJobRunner(**runner_kwargs).make_emr_conn() # if --max-days-ago is set, only look at recent jobs created_after = None if max_days_ago is not None: created_after = now - timedelta(days=max_days_ago) for cluster_summary in _yield_all_clusters(emr_conn, created_after=created_after): cluster_id = cluster_summary.id cluster = emr_conn.describe_cluster(cluster_id) cluster.steps = list(_yield_all_steps(emr_conn, cluster_id)) cluster.bootstrapactions = list( _yield_all_bootstrap_actions(emr_conn, cluster_id)) yield cluster
def yield_clusters(max_days_ago=None, now=None, **runner_kwargs): """Get relevant job flow information from EMR. :param float max_days_ago: If set, don't fetch job flows created longer than this many days ago. :param now: the current UTC time, as a :py:class:`datetime.datetime`. Defaults to the current time. :param runner_kwargs: keyword args to pass through to :py:class:`~mrjob.emr.EMRJobRunner` """ if now is None: now = datetime.utcnow() emr_conn = EMRJobRunner(**runner_kwargs).make_emr_conn() # if --max-days-ago is set, only look at recent jobs created_after = None if max_days_ago is not None: created_after = now - timedelta(days=max_days_ago) for cluster_summary in _yield_all_clusters(emr_conn, created_after=created_after): cluster_id = cluster_summary.id cluster = emr_conn.describe_cluster(cluster_id) cluster.steps = list(_yield_all_steps(emr_conn, cluster_id)) cluster.bootstrapactions = list(_yield_all_bootstrap_actions(emr_conn, cluster_id)) yield cluster
def test_terminate_job_flow(self): cluster_id = self.make_cluster(pool_emr_job_flows=True) self.monkey_patch_argv('--quiet', '--no-conf', 'j-MOCKCLUSTER0') terminate_main() emr_conn = EMRJobRunner(conf_paths=[]).make_emr_conn() cluster = emr_conn.describe_cluster(cluster_id) self.assertEqual(cluster.status.state, 'TERMINATED')
def _yield_clusters(max_days_ago=None, now=None, **runner_kwargs): """Get relevant cluster information from EMR. :param float max_days_ago: If set, don't fetch clusters created longer than this many days ago. :param now: the current UTC time, as a :py:class:`datetime.datetime`. Defaults to the current time. :param runner_kwargs: keyword args to pass through to :py:class:`~mrjob.emr.EMRJobRunner` """ if now is None: now = _boto3_now() emr_client = EMRJobRunner(**runner_kwargs).make_emr_client() # if --max-days-ago is set, only look at recent jobs created_after = None if max_days_ago is not None: created_after = now - timedelta(days=max_days_ago) # use _DELAY to sleep 1 second after each API call (see #1091). Could # implement some sort of connection wrapper for this if it becomes more # generally useful. list_clusters_kwargs = dict(_delay=_DELAY) if created_after is not None: list_clusters_kwargs['CreatedAfter'] = created_after for cluster_summary in _boto3_paginate('Clusters', emr_client, 'list_clusters', **list_clusters_kwargs): cluster_id = cluster_summary['Id'] cluster = emr_client.describe_cluster(ClusterId=cluster_id)['Cluster'] sleep(_DELAY) cluster['Steps'] = list( reversed( list( _boto3_paginate('Steps', emr_client, 'list_steps', ClusterId=cluster_id, _delay=_DELAY)))) cluster['BootstrapActions'] = list( _boto3_paginate('BootstrapActions', emr_client, 'list_bootstrap_actions', ClusterId=cluster_id, _delay=_DELAY)) yield cluster
def _yield_clusters(max_days_ago=None, now=None, **runner_kwargs): """Get relevant cluster information from EMR. :param float max_days_ago: If set, don't fetch clusters created longer than this many days ago. :param now: the current UTC time, as a :py:class:`datetime.datetime`. Defaults to the current time. :param runner_kwargs: keyword args to pass through to :py:class:`~mrjob.emr.EMRJobRunner` """ if now is None: now = _boto3_now() emr_client = EMRJobRunner(**runner_kwargs).make_emr_client() # if --max-days-ago is set, only look at recent jobs created_after = None if max_days_ago is not None: created_after = now - timedelta(days=max_days_ago) # use _DELAY to sleep 1 second after each API call (see #1091). Could # implement some sort of connection wrapper for this if it becomes more # generally useful. list_clusters_kwargs = dict(_delay=_DELAY) if created_after is not None: list_clusters_kwargs['CreatedAfter'] = created_after for cluster_summary in _boto3_paginate( 'Clusters', emr_client, 'list_clusters', **list_clusters_kwargs): cluster_id = cluster_summary['Id'] cluster = emr_client.describe_cluster(ClusterId=cluster_id)['Cluster'] sleep(_DELAY) cluster['Steps'] = list(reversed(list(_boto3_paginate( 'Steps', emr_client, 'list_steps', ClusterId=cluster_id, _delay=_DELAY)))) cluster['BootstrapActions'] = list(_boto3_paginate( 'BootstrapActions', emr_client, 'list_bootstrap_actions', ClusterId=cluster_id, _delay=_DELAY)) yield cluster