Exemplo n.º 1
0
def yield_clusters(max_days_ago=None, now=None, **runner_kwargs):
    """Get relevant job flow information from EMR.

    :param float max_days_ago: If set, don't fetch job flows created longer
                               than this many days ago.
    :param now: the current UTC time, as a :py:class:`datetime.datetime`.
                Defaults to the current time.
    :param runner_kwargs: keyword args to pass through to
                          :py:class:`~mrjob.emr.EMRJobRunner`
    """
    if now is None:
        now = datetime.utcnow()

    emr_conn = EMRJobRunner(**runner_kwargs).make_emr_conn()

    # if --max-days-ago is set, only look at recent jobs
    created_after = None
    if max_days_ago is not None:
        created_after = now - timedelta(days=max_days_ago)

    for cluster_summary in _yield_all_clusters(emr_conn,
                                               created_after=created_after):
        cluster_id = cluster_summary.id

        cluster = emr_conn.describe_cluster(cluster_id)
        cluster.steps = list(_yield_all_steps(emr_conn, cluster_id))
        cluster.bootstrapactions = list(
            _yield_all_bootstrap_actions(emr_conn, cluster_id))

        yield cluster
Exemplo n.º 2
0
def yield_clusters(max_days_ago=None, now=None, **runner_kwargs):
    """Get relevant job flow information from EMR.

    :param float max_days_ago: If set, don't fetch job flows created longer
                               than this many days ago.
    :param now: the current UTC time, as a :py:class:`datetime.datetime`.
                Defaults to the current time.
    :param runner_kwargs: keyword args to pass through to
                          :py:class:`~mrjob.emr.EMRJobRunner`
    """
    if now is None:
        now = datetime.utcnow()

    emr_conn = EMRJobRunner(**runner_kwargs).make_emr_conn()

    # if --max-days-ago is set, only look at recent jobs
    created_after = None
    if max_days_ago is not None:
        created_after = now - timedelta(days=max_days_ago)

    for cluster_summary in _yield_all_clusters(emr_conn, created_after=created_after):
        cluster_id = cluster_summary.id

        cluster = emr_conn.describe_cluster(cluster_id)
        cluster.steps = list(_yield_all_steps(emr_conn, cluster_id))
        cluster.bootstrapactions = list(_yield_all_bootstrap_actions(emr_conn, cluster_id))

        yield cluster
Exemplo n.º 3
0
    def test_terminate_job_flow(self):
        cluster_id = self.make_cluster(pool_emr_job_flows=True)
        self.monkey_patch_argv('--quiet', '--no-conf', 'j-MOCKCLUSTER0')

        terminate_main()

        emr_conn = EMRJobRunner(conf_paths=[]).make_emr_conn()
        cluster = emr_conn.describe_cluster(cluster_id)
        self.assertEqual(cluster.status.state, 'TERMINATED')
    def test_terminate_job_flow(self):
        cluster_id = self.make_cluster(pool_emr_job_flows=True)
        self.monkey_patch_argv('--quiet', '--no-conf', 'j-MOCKCLUSTER0')

        terminate_main()

        emr_conn = EMRJobRunner(conf_paths=[]).make_emr_conn()
        cluster = emr_conn.describe_cluster(cluster_id)
        self.assertEqual(cluster.status.state, 'TERMINATED')
Exemplo n.º 5
0
def _yield_clusters(max_days_ago=None, now=None, **runner_kwargs):
    """Get relevant cluster information from EMR.

    :param float max_days_ago: If set, don't fetch clusters created longer
                               than this many days ago.
    :param now: the current UTC time, as a :py:class:`datetime.datetime`.
                Defaults to the current time.
    :param runner_kwargs: keyword args to pass through to
                          :py:class:`~mrjob.emr.EMRJobRunner`
    """
    if now is None:
        now = _boto3_now()

    emr_client = EMRJobRunner(**runner_kwargs).make_emr_client()

    # if --max-days-ago is set, only look at recent jobs
    created_after = None
    if max_days_ago is not None:
        created_after = now - timedelta(days=max_days_ago)

    # use _DELAY to sleep 1 second after each API call (see #1091). Could
    # implement some sort of connection wrapper for this if it becomes more
    # generally useful.
    list_clusters_kwargs = dict(_delay=_DELAY)
    if created_after is not None:
        list_clusters_kwargs['CreatedAfter'] = created_after

    for cluster_summary in _boto3_paginate('Clusters', emr_client,
                                           'list_clusters',
                                           **list_clusters_kwargs):

        cluster_id = cluster_summary['Id']

        cluster = emr_client.describe_cluster(ClusterId=cluster_id)['Cluster']
        sleep(_DELAY)

        cluster['Steps'] = list(
            reversed(
                list(
                    _boto3_paginate('Steps',
                                    emr_client,
                                    'list_steps',
                                    ClusterId=cluster_id,
                                    _delay=_DELAY))))

        cluster['BootstrapActions'] = list(
            _boto3_paginate('BootstrapActions',
                            emr_client,
                            'list_bootstrap_actions',
                            ClusterId=cluster_id,
                            _delay=_DELAY))

        yield cluster
Exemplo n.º 6
0
def _yield_clusters(max_days_ago=None, now=None, **runner_kwargs):
    """Get relevant cluster information from EMR.

    :param float max_days_ago: If set, don't fetch clusters created longer
                               than this many days ago.
    :param now: the current UTC time, as a :py:class:`datetime.datetime`.
                Defaults to the current time.
    :param runner_kwargs: keyword args to pass through to
                          :py:class:`~mrjob.emr.EMRJobRunner`
    """
    if now is None:
        now = _boto3_now()

    emr_client = EMRJobRunner(**runner_kwargs).make_emr_client()

    # if --max-days-ago is set, only look at recent jobs
    created_after = None
    if max_days_ago is not None:
        created_after = now - timedelta(days=max_days_ago)

    # use _DELAY to sleep 1 second after each API call (see #1091). Could
    # implement some sort of connection wrapper for this if it becomes more
    # generally useful.
    list_clusters_kwargs = dict(_delay=_DELAY)
    if created_after is not None:
        list_clusters_kwargs['CreatedAfter'] = created_after

    for cluster_summary in _boto3_paginate(
            'Clusters', emr_client, 'list_clusters', **list_clusters_kwargs):

        cluster_id = cluster_summary['Id']

        cluster = emr_client.describe_cluster(ClusterId=cluster_id)['Cluster']
        sleep(_DELAY)

        cluster['Steps'] = list(reversed(list(_boto3_paginate(
            'Steps', emr_client, 'list_steps',
            ClusterId=cluster_id, _delay=_DELAY))))

        cluster['BootstrapActions'] = list(_boto3_paginate(
            'BootstrapActions', emr_client, 'list_bootstrap_actions',
            ClusterId=cluster_id, _delay=_DELAY))

        yield cluster