Пример #1
0
class SSHKey(CreatedByModel, EditedAtModel, URLActionModel):
    """
    A Django data model to store public SSH keys for logged-in users
    to be used in the :mod:`on-demand clusters <atmo.clusters>`.
    """
    #: The list of valid SSH key data prefixes, will be validated
    #: on save.
    VALID_PREFIXES = [
        'ssh-rsa',
        'ssh-dss',
        'ecdsa-sha2-nistp256',
        'ecdsa-sha2-nistp384',
        'ecdsa-sha2-nistp521',
    ]

    title = models.CharField(
        max_length=100,
        help_text='Name to give to this public key',
    )
    key = models.TextField(
        help_text='Should start with one of the following prefixes: %s' %
                  ', '.join(VALID_PREFIXES),
    )
    fingerprint = models.CharField(
        max_length=48,
        blank=True,
    )

    class Meta:
        permissions = [
            ('view_sshkey', 'Can view SSH key'),
        ]
        unique_together = (
            ('created_by', 'fingerprint')
        )

    __str__ = autostr('{self.title}')

    __repr__ = autorepr(['title', 'fingerprint'])

    url_prefix = 'keys'
    url_actions = ['detail', 'delete', 'raw']

    def get_absolute_url(self):
        return self.urls.detail

    @property
    def prefix(self):
        """
        The prefix of the key data,
        one of the :data:`~atmo.keys.models.SSHKey.VALID_PREFIXES`.
        """
        return self.key.strip().split()[0]

    def save(self, *args, **kwargs):
        self.fingerprint = calculate_fingerprint(self.key)
        super().save(*args, **kwargs)
Пример #2
0
class SSHKey(CreatedByModel, EditedAtModel, URLActionModel):
    """
    A Django data model to store public SSH keys for logged-in users
    to be used in the :mod:`on-demand clusters <atmo.clusters>`.
    """

    #: The list of valid SSH key data prefixes, will be validated
    #: on save.
    VALID_PREFIXES = [
        "ssh-rsa",
        "ssh-dss",
        "ecdsa-sha2-nistp256",
        "ecdsa-sha2-nistp384",
        "ecdsa-sha2-nistp521",
    ]

    title = models.CharField(
        max_length=100, help_text="Name to give to this public key"
    )
    key = models.TextField(
        help_text="Should start with one of the following prefixes: %s"
        % ", ".join(VALID_PREFIXES)
    )
    fingerprint = models.CharField(max_length=48, blank=True)

    class Meta:
        permissions = [("view_sshkey", "Can view SSH key")]
        unique_together = ("created_by", "fingerprint")

    __str__ = autostr("{self.title}")

    __repr__ = autorepr(["title", "fingerprint"])

    url_prefix = "keys"
    url_actions = ["detail", "delete", "raw"]

    def get_absolute_url(self):
        return self.urls.detail

    @property
    def prefix(self):
        """
        The prefix of the key data,
        one of the :data:`~atmo.keys.models.SSHKey.VALID_PREFIXES`.
        """
        return self.key.strip().split()[0]

    def save(self, *args, **kwargs):
        self.fingerprint = calculate_fingerprint(self.key)
        super().save(*args, **kwargs)
Пример #3
0
class SparkJobRunAlert(EditedAtModel):
    """
    A data model to store job run alerts for later processing by an
    async job that sends out emails.
    """
    run = models.ForeignKey(
        SparkJobRun,
        on_delete=models.CASCADE,
        related_name='alerts',
    )
    reason_code = models.CharField(
        max_length=50,
        blank=True,
        null=True,
        help_text="The reason code for the creation of the alert.",
    )
    reason_message = models.TextField(
        default='',
        help_text="The reason message for the creation of the alert.",
    )
    mail_sent_date = models.DateTimeField(
        blank=True,
        null=True,
        help_text="The datetime the alert email was sent.",
    )

    class Meta:
        unique_together = [
            ['run', 'reason_code', 'reason_message'],
        ]
        index_together = [
            ['reason_code', 'mail_sent_date'],
        ]

    __str__ = autostr('{self.id}')

    def short_reason_message(self):
        return self.reason_message[:50]

    __repr__ = autorepr(
        ['id', 'reason_code', 'short_reason_message'],
        short_reason_message=short_reason_message,
    )
Пример #4
0
class EMRRelease(EditedAtModel):
    version = models.CharField(
        max_length=50,
        primary_key=True,
    )
    changelog_url = models.TextField(
        help_text='The URL of the changelog with details about the release.',
        default='',
    )
    help_text = models.TextField(
        help_text=
        'Optional help text to show for users when creating a cluster.',
        default='',
    )
    is_active = models.BooleanField(
        help_text='Whether this version should be shown to the user at all.',
        default=True,
    )
    is_experimental = models.BooleanField(
        help_text=
        'Whether this version should be shown to users as experimental.',
        default=False,
    )
    is_deprecated = models.BooleanField(
        help_text=
        'Whether this version should be shown to users as deprecated.',
        default=False,
    )

    objects = EMRReleaseQuerySet.as_manager()

    class Meta:
        ordering = ['-version']
        get_latest_by = 'created_at'
        verbose_name = 'EMR release'
        verbose_name_plural = 'EMR releases'

    __str__ = autostr('{self.version}')

    __repr__ = autorepr(
        ['version', 'is_active', 'is_experimental', 'is_deprecated'])
Пример #5
0
class EMRRelease(EditedAtModel):
    version = models.CharField(max_length=50, primary_key=True)
    changelog_url = models.TextField(
        help_text="The URL of the changelog with details about the release.",
        default="")
    help_text = models.TextField(
        help_text=
        "Optional help text to show for users when creating a cluster.",
        default="",
    )
    is_active = models.BooleanField(
        help_text="Whether this version should be shown to the user at all.",
        default=True,
    )
    is_experimental = models.BooleanField(
        help_text=
        "Whether this version should be shown to users as experimental.",
        default=False,
    )
    is_deprecated = models.BooleanField(
        help_text=
        "Whether this version should be shown to users as deprecated.",
        default=False,
    )

    objects = EMRReleaseQuerySet.as_manager()

    class Meta:
        ordering = ["-version"]
        get_latest_by = "created_at"
        verbose_name = "EMR release"
        verbose_name_plural = "EMR releases"

    __str__ = autostr("{self.version}")

    __repr__ = autorepr(
        ["version", "is_active", "is_experimental", "is_deprecated"])
class SparkJobRun(EditedAtModel):
    """
    A data model to store information about every individual run of a
    scheduled Spark job.

    This denormalizes some values from its related data model
    :class:`SparkJob`.
    """

    spark_job = models.ForeignKey(
        SparkJob,
        on_delete=models.CASCADE,
        related_name="runs",
        related_query_name="runs",
    )
    jobflow_id = models.CharField(max_length=50, blank=True, null=True)
    emr_release_version = models.CharField(max_length=50,
                                           blank=True,
                                           null=True)
    size = models.IntegerField(
        help_text="Number of computers used to run the job.",
        blank=True,
        null=True)
    status = models.CharField(max_length=50,
                              blank=True,
                              default=DEFAULT_STATUS,
                              db_index=True)
    scheduled_at = models.DateTimeField(
        blank=True,
        null=True,
        help_text="Date/time that the job was scheduled.")
    started_at = models.DateTimeField(
        blank=True,
        null=True,
        help_text="Date/time when the cluster was started on AWS EMR.",
    )
    ready_at = models.DateTimeField(
        blank=True,
        null=True,
        help_text=
        "Date/time when the cluster was ready to run steps on AWS EMR.",
    )
    finished_at = models.DateTimeField(
        blank=True,
        null=True,
        help_text="Date/time that the job was terminated or failed.",
    )

    objects = SparkJobRunQuerySet.as_manager()

    class Meta:
        get_latest_by = "created_at"
        ordering = ["-created_at"]

    __str__ = autostr("{self.jobflow_id}")

    def spark_job_identifier(self):
        return self.spark_job.identifier

    __repr__ = autorepr(
        ["jobflow_id", "spark_job_identifier", "emr_release_version", "size"],
        spark_job_identifier=spark_job_identifier,
    )

    @property
    def info(self):
        return self.spark_job.cluster_provisioner.info(self.jobflow_id)

    def sync(self, info=None):
        """
        Updates latest status and life cycle datetimes.
        """
        if info is None:
            info = self.info
        # a mapping between what the provisioner returns what the data model uses
        model_field_map = (
            ("state", "status"),
            ("creation_datetime", "started_at"),
            ("ready_datetime", "ready_at"),
            ("end_datetime", "finished_at"),
        )
        save_needed = False
        date_fields_updated = False

        # set the various model fields to the value the API returned
        for api_field, model_field in model_field_map:
            field_value = info.get(api_field)
            if field_value is None or field_value == getattr(
                    self, model_field):
                continue
            setattr(self, model_field, field_value)
            save_needed = True

            if model_field in ("started_at", "ready_at", "finished_at"):
                date_fields_updated = True

        with transaction.atomic():
            # If the job cluster terminated with error raise the alarm.
            if self.status == Cluster.STATUS_TERMINATED_WITH_ERRORS:
                transaction.on_commit(lambda: self.alert(info))

            # If any data changed, save it.
            if save_needed:
                self.save()

        with transaction.atomic():
            if date_fields_updated:
                # When job cluster is ready, record time to ready.
                if self.ready_at and not self.finished_at:
                    # Time in seconds it took the cluster to be ready.
                    time_to_ready = (self.ready_at - self.started_at).seconds
                    Metric.record(
                        "sparkjob-time-to-ready",
                        time_to_ready,
                        data={
                            "identifier": self.spark_job.identifier,
                            "size": self.size,
                            "jobflow_id": self.jobflow_id,
                        },
                    )

                if self.finished_at:
                    # When job is finished, record normalized instance hours.
                    hours = math.ceil(
                        (self.finished_at - self.started_at).seconds / 60 / 60)
                    normalized_hours = hours * self.size
                    Metric.record(
                        "sparkjob-normalized-instance-hours",
                        normalized_hours,
                        data={
                            "identifier": self.spark_job.identifier,
                            "size": self.size,
                            "jobflow_id": self.jobflow_id,
                        },
                    )

                if self.finished_at and self.ready_at:
                    # When job is finished, record time in seconds it took the
                    # scheduled job to run. Sometimes `ready_at` won't be
                    # available if the cluster terminated with errors.
                    run_time = (self.finished_at - self.ready_at).seconds
                    Metric.record(
                        "sparkjob-run-time",
                        run_time,
                        data={
                            "identifier": self.spark_job.identifier,
                            "size": self.size,
                            "jobflow_id": self.jobflow_id,
                        },
                    )

        return self.status

    def alert(self, info):
        self.alerts.get_or_create(
            reason_code=info["state_change_reason_code"],
            reason_message=info["state_change_reason_message"],
        )
class SparkJob(EMRReleaseModel, CreatedByModel, EditedAtModel, URLActionModel):
    """
    A data model to store details about a scheduled Spark job, to be
    run on AWS EMR.
    """

    INTERVAL_DAILY = 24
    INTERVAL_WEEKLY = INTERVAL_DAILY * 7
    INTERVAL_MONTHLY = INTERVAL_DAILY * 30
    INTERVAL_CHOICES = [
        (INTERVAL_DAILY, "Daily"),
        (INTERVAL_WEEKLY, "Weekly"),
        (INTERVAL_MONTHLY, "Monthly"),
    ]
    RESULT_PRIVATE = "private"
    RESULT_PUBLIC = "public"
    RESULT_VISIBILITY_CHOICES = [(RESULT_PRIVATE, "Private"),
                                 (RESULT_PUBLIC, "Public")]
    identifier = models.CharField(
        max_length=100,
        help_text="Job name, used to uniqely identify individual jobs.",
        unique=True,
        db_index=True,
    )
    description = models.TextField(help_text="Job description.", default="")
    notebook_s3_key = models.CharField(
        max_length=800,
        help_text=
        "S3 key of the notebook after uploading it to the Spark code bucket.",
    )
    result_visibility = models.CharField(  # can currently be "public" or "private"
        max_length=50,
        help_text=
        "Whether notebook results are uploaded to a public or private bucket",
        choices=RESULT_VISIBILITY_CHOICES,
        default=RESULT_PRIVATE,
    )
    size = models.IntegerField(
        help_text="Number of computers to use to run the job.")
    interval_in_hours = models.IntegerField(
        help_text="Interval at which the job should run, in hours.",
        choices=INTERVAL_CHOICES,
        default=INTERVAL_DAILY,
    )
    job_timeout = models.IntegerField(
        help_text="Number of hours before the job times out.")
    start_date = models.DateTimeField(
        help_text="Date/time that the job should start being scheduled to run."
    )
    end_date = models.DateTimeField(
        blank=True,
        null=True,
        help_text=
        "Date/time that the job should stop being scheduled to run, null if no end date.",
    )
    expired_date = models.DateTimeField(
        blank=True,
        null=True,
        help_text="Date/time that the job was expired.",
        db_index=True,
    )
    is_enabled = models.BooleanField(
        default=True, help_text="Whether the job should run or not.")

    objects = SparkJobQuerySet.as_manager()

    class Meta:
        permissions = [("view_sparkjob", "Can view Spark job")]

    __str__ = autostr("{self.identifier}")

    __repr__ = autorepr(["identifier", "size", "is_enabled"])

    url_prefix = "jobs"
    url_actions = ["delete", "detail", "download", "edit", "run", "zeppelin"]

    def get_absolute_url(self):
        return self.urls.detail

    @property
    def provisioner(self):
        return SparkJobProvisioner()

    # TEMPORARY till we have 1:1 relationship to cluster object
    # and we can then ask for spark_job.cluster.provisioner
    @property
    def cluster_provisioner(self):
        return ClusterProvisioner()

    @property
    def schedule(self):
        from .schedules import SparkJobSchedule

        return SparkJobSchedule(self)

    def has_future_end_date(self, now):
        # no end date means it'll always be due
        if self.end_date is None:
            return True
        return self.end_date >= now

    @property
    def has_never_run(self):
        """
        Whether the job has run before.
        Looks at both the cluster status and our own record when
        we asked it to run.
        """
        return (self.latest_run is None
                or self.latest_run.status == DEFAULT_STATUS
                or self.latest_run.scheduled_at is None)

    @property
    def has_finished(self):
        """Whether the job's cluster is terminated or failed"""
        return self.latest_run and self.latest_run.status in Cluster.FINAL_STATUS_LIST

    @property
    def has_timed_out(self):
        """
        Whether the current job run has been running longer than the
        job's timeout allows.
        """
        if self.has_never_run:
            # Job isn't even running at the moment and never ran before
            return False
        timeout_delta = timedelta(hours=self.job_timeout)
        max_run_time = self.latest_run.scheduled_at + timeout_delta
        timed_out = timezone.now() >= max_run_time
        return not self.is_runnable and timed_out

    @property
    def is_due(self):
        """
        Whether the start date is in the past and the end date is in the
        future.
        """
        now = timezone.now()
        has_past_start_date = self.start_date <= now
        return has_past_start_date and self.has_future_end_date(now)

    @property
    def is_runnable(self):
        """
        Either the job has never run before or was never finished.

        This is checked right before the actual provisioning.
        """
        return self.has_never_run or self.has_finished

    @property
    def should_run(self):
        """Whether the scheduled Spark job should run."""
        return self.is_runnable and self.is_enabled and self.is_due

    @property
    def is_public(self):
        return self.result_visibility == self.RESULT_PUBLIC

    @property
    def is_active(self):
        return self.latest_run and self.latest_run.status in Cluster.ACTIVE_STATUS_LIST

    @property
    def notebook_name(self):
        return self.notebook_s3_key.rsplit("/", 1)[-1]

    @cached_property
    def notebook_s3_object(self):
        return self.provisioner.get(self.notebook_s3_key)

    @cached_property
    def results(self):
        return self.provisioner.results(self.identifier, self.is_public)

    def get_latest_run(self):
        try:
            return self.runs.latest()
        except SparkJobRun.DoesNotExist:
            return None

    latest_run = cached_property(get_latest_run, name="latest_run")

    def run(self):
        """Actually run the scheduled Spark job."""
        # if the job ran before and is still running, don't start it again
        if not self.is_runnable:
            return
        jobflow_id = self.provisioner.run(
            user_username=self.created_by.username,
            user_email=self.created_by.email,
            identifier=self.identifier,
            emr_release=self.emr_release.version,
            size=self.size,
            notebook_key=self.notebook_s3_key,
            is_public=self.is_public,
            job_timeout=self.job_timeout,
        )
        # Create new job history record.
        run = self.runs.create(
            spark_job=self,
            jobflow_id=jobflow_id,
            scheduled_at=timezone.now(),
            emr_release_version=self.emr_release.version,
            size=self.size,
        )
        # Remove the cached latest run to this objects will requery it.
        try:
            delattr(self, "latest_run")
        except AttributeError:  # pragma: no cover
            pass  # It didn't have a `latest_run` and that's ok.

        with transaction.atomic():
            Metric.record("sparkjob-emr-version",
                          data={"version": self.emr_release.version})

        # sync with EMR API
        transaction.on_commit(run.sync)

    def expire(self):
        # TODO disable the job as well once it's easy to re-enable the job
        deleted = self.schedule.delete()
        self.expired_date = timezone.now()
        self.save()
        return deleted

    def terminate(self):
        """Stop the currently running scheduled Spark job."""
        if self.latest_run:
            self.cluster_provisioner.stop(self.latest_run.jobflow_id)

    def first_run(self):
        if self.latest_run:
            return None
        from .tasks import run_job

        return run_job.apply_async(
            args=(self.pk, ),
            kwargs={"first_run": True},
            # make sure we run this task only when we expect it
            # may be in the future, may be in the past
            # but definitely at a specific time
            eta=self.start_date,
        )

    def save(self, *args, **kwargs):
        # whether the job is being created for the first time
        first_save = self.pk is None
        # resetting expired_date in case a user resets the end_date
        if self.expired_date and self.end_date and self.end_date > timezone.now(
        ):
            self.expired_date = None
        super().save(*args, **kwargs)
        # Remove the cached latest run to this objects will requery it.
        try:
            delattr(self, "latest_run")
        except AttributeError:  # pragma: no cover
            pass  # It didn't have a `latest_run` and that's ok.
        # first remove if it exists
        self.schedule.delete()
        # and then add it, but only if the end date is in the future
        if self.has_future_end_date(timezone.now()):
            self.schedule.add()
        if first_save:
            transaction.on_commit(self.first_run)

    def delete(self, *args, **kwargs):
        # make sure to shut down the cluster if it's currently running
        self.terminate()
        # make sure to clean up the job notebook from storage
        self.provisioner.remove(self.notebook_s3_key)
        self.schedule.delete()
        super().delete(*args, **kwargs)
Пример #8
0
def test_with_function_as_input():
    f = autounicode(autostr("{self.name} {foo}", foo=lambda x: 42))
    assert_equal(f(ascii), "Alex 42")
Пример #9
0
class Cluster(EMRReleaseModel, CreatedByModel, EditedAtModel):
    STATUS_STARTING = 'STARTING'
    STATUS_BOOTSTRAPPING = 'BOOTSTRAPPING'
    STATUS_RUNNING = 'RUNNING'
    STATUS_WAITING = 'WAITING'
    STATUS_TERMINATING = 'TERMINATING'
    STATUS_TERMINATED = 'TERMINATED'
    STATUS_TERMINATED_WITH_ERRORS = 'TERMINATED_WITH_ERRORS'

    ACTIVE_STATUS_LIST = (
        STATUS_STARTING,
        STATUS_BOOTSTRAPPING,
        STATUS_RUNNING,
        STATUS_WAITING,
        STATUS_TERMINATING,
    )
    READY_STATUS_LIST = [
        STATUS_RUNNING,
        STATUS_WAITING,
    ]
    TERMINATED_STATUS_LIST = (STATUS_TERMINATED, )
    FAILED_STATUS_LIST = (STATUS_TERMINATED_WITH_ERRORS, )
    FINAL_STATUS_LIST = TERMINATED_STATUS_LIST + FAILED_STATUS_LIST

    STATE_CHANGE_REASON_INTERNAL_ERROR = 'INTERNAL_ERROR'
    STATE_CHANGE_REASON_VALIDATION_ERROR = 'VALIDATION_ERROR'
    STATE_CHANGE_REASON_INSTANCE_FAILURE = 'INSTANCE_FAILURE'
    STATE_CHANGE_REASON_BOOTSTRAP_FAILURE = 'BOOTSTRAP_FAILURE'
    STATE_CHANGE_REASON_USER_REQUEST = 'USER_REQUEST'
    STATE_CHANGE_REASON_STEP_FAILURE = 'STEP_FAILURE'
    STATE_CHANGE_REASON_ALL_STEPS_COMPLETED = 'ALL_STEPS_COMPLETED'
    FAILED_STATE_CHANGE_REASON_LIST = [
        STATE_CHANGE_REASON_INTERNAL_ERROR,
        STATE_CHANGE_REASON_VALIDATION_ERROR,
        STATE_CHANGE_REASON_INSTANCE_FAILURE,
        STATE_CHANGE_REASON_BOOTSTRAP_FAILURE,
        STATE_CHANGE_REASON_STEP_FAILURE,
    ]
    REQUESTED_STATE_CHANGE_REASON_LIST = [
        STATE_CHANGE_REASON_USER_REQUEST,
    ]
    COMPLETED_STATE_CHANGE_REASON_LIST = [
        STATE_CHANGE_REASON_ALL_STEPS_COMPLETED,
    ]
    DEFAULT_SIZE = 1
    DEFAULT_LIFETIME = 8

    identifier = models.CharField(
        max_length=100,
        help_text=
        "Cluster name, used to non-uniqely identify individual clusters.")
    size = models.IntegerField(
        help_text="Number of computers used in the cluster.")
    lifetime = models.PositiveSmallIntegerField(
        help_text=
        "Lifetime of the cluster after which it's automatically terminated, in hours.",
        default=DEFAULT_LIFETIME,
    )
    lifetime_extension_count = models.PositiveSmallIntegerField(
        help_text="Number of lifetime extensions.",
        default=0,
    )
    ssh_key = models.ForeignKey(
        'keys.SSHKey',
        on_delete=models.SET_NULL,
        blank=True,
        null=True,
        related_name='launched_clusters',  # e.g. ssh_key.launched_clusters.all()
        help_text="SSH key to use when launching the cluster.",
    )
    expires_at = models.DateTimeField(
        blank=True,
        null=True,
        help_text=
        "Date/time that the cluster will expire and automatically be deleted.",
    )
    started_at = models.DateTimeField(
        blank=True,
        null=True,
        help_text="Date/time when the cluster was started on AWS EMR.",
    )
    ready_at = models.DateTimeField(
        blank=True,
        null=True,
        help_text=
        "Date/time when the cluster was ready to run steps on AWS EMR.",
    )
    finished_at = models.DateTimeField(
        blank=True,
        null=True,
        help_text=
        "Date/time when the cluster was terminated or failed on AWS EMR.",
    )
    jobflow_id = models.CharField(
        max_length=50,
        blank=True,
        null=True,
        help_text=
        "AWS cluster/jobflow ID for the cluster, used for cluster management.",
    )
    most_recent_status = models.CharField(
        max_length=50,
        default='',
        blank=True,
        help_text="Most recently retrieved AWS status for the cluster.",
        db_index=True,
    )
    master_address = models.CharField(
        max_length=255,
        default='',
        blank=True,
        help_text=("Public address of the master node."
                   "This is only available once the cluster has bootstrapped"),
    )
    expiration_mail_sent = models.BooleanField(
        default=False,
        help_text="Whether the expiration mail were sent.",
    )

    objects = ClusterQuerySet.as_manager()

    class Meta:
        permissions = [
            ('view_cluster', 'Can view cluster'),
        ]

    #: A cluster URL helper.
    class urls(urlman.Urls):
        def detail(self):
            return reverse('clusters-detail', kwargs={'id': self.id})

        def extend(self):
            return reverse('clusters-extend', kwargs={'id': self.id})

        def terminate(self):
            return reverse('clusters-terminate', kwargs={'id': self.id})

    __str__ = autostr('{self.identifier}')

    __repr__ = autorepr([
        'identifier',
        'most_recent_status',
        'size',
        'lifetime',
        'expires_at',
        'lifetime_extension_count',
    ])

    def get_absolute_url(self):
        return self.urls.detail

    @property
    def is_active(self):
        """Returns whether the cluster is active or not."""
        return self.most_recent_status in self.ACTIVE_STATUS_LIST

    @property
    def is_terminated(self):
        """Returns whether the cluster is terminated or not."""
        return self.most_recent_status in self.TERMINATED_STATUS_LIST

    @property
    def is_failed(self):
        """Returns whether the cluster has failed or not."""
        return self.most_recent_status in self.FAILED_STATUS_LIST

    @property
    def is_terminating(self):
        """Returns whether the cluster is terminating or not."""
        return self.most_recent_status == self.STATUS_TERMINATING

    @property
    def is_ready(self):
        """Returns whether the cluster is ready or not."""
        return self.most_recent_status == self.STATUS_WAITING

    @property
    def is_expiring_soon(self):
        """Returns whether the cluster is expiring in the next hour."""
        return self.expires_at <= timezone.now() + timedelta(hours=1)

    @property
    def provisioner(self):
        return ClusterProvisioner()

    @property
    def info(self):
        """Returns the provisioning information for the cluster."""
        return self.provisioner.info(self.jobflow_id)

    def sync(self, info=None):
        """Should be called to update latest cluster status in `self.most_recent_status`."""
        if info is None:
            info = self.info

        # Map AWS API fields to Cluster model fields.
        model_field_map = (
            ('state', 'most_recent_status'),
            ('public_dns', 'master_address'),
            ('creation_datetime', 'started_at'),
            ('ready_datetime', 'ready_at'),
            ('end_datetime', 'finished_at'),
        )
        save_needed = False
        date_fields_updated = False

        # set the various model fields to the value the API returned
        for api_field, model_field in model_field_map:
            field_value = info.get(api_field)
            # Only update the field if the value for a field is not set or it
            # hasn't changed.
            if field_value is None or field_value == getattr(
                    self, model_field):
                continue
            setattr(self, model_field, field_value)
            save_needed = True

            if model_field in ('started_at', 'ready_at', 'finished_at'):
                date_fields_updated = True

        if save_needed:
            self.save()

        if date_fields_updated:

            if self.finished_at:
                # When cluster is finished, record normalized instance hours.
                hours = math.ceil(
                    (self.finished_at - self.started_at).seconds / 60 / 60)
                normalized_hours = hours * self.size
                Metric.record('cluster-normalized-instance-hours',
                              normalized_hours,
                              data={
                                  'identifier': self.identifier,
                                  'size': self.size,
                                  'jobflow_id': self.jobflow_id,
                              })

            # When cluster is ready, record a count and time to ready.
            if self.ready_at and not self.finished_at:
                # A simple count to track number of clusters spun up
                # successfully.
                Metric.record('cluster-ready',
                              data={
                                  'identifier': self.identifier,
                                  'size': self.size,
                                  'jobflow_id': self.jobflow_id,
                              })
                # Time in seconds it took the cluster to be ready.
                time_to_ready = (self.ready_at - self.started_at).seconds
                Metric.record('cluster-time-to-ready',
                              time_to_ready,
                              data={
                                  'identifier': self.identifier,
                                  'size': self.size,
                                  'jobflow_id': self.jobflow_id,
                              })

    def save(self, *args, **kwargs):
        """Insert the cluster into the database or update it if already
        present, spawning the cluster if it's not already spawned.
        """
        # actually start the cluster
        if self.jobflow_id is None:
            self.jobflow_id = self.provisioner.start(
                user_username=self.created_by.username,
                user_email=self.created_by.email,
                identifier=self.identifier,
                emr_release=self.emr_release.version,
                size=self.size,
                public_key=self.ssh_key.key,
            )
            # once we've stored the jobflow id we can fetch the status for the first time
            transaction.on_commit(self.sync)

            Metric.record('cluster-emr-version',
                          data={'version': self.emr_release.version})

        # set the dates
        if not self.expires_at:
            # clusters should expire after the lifetime it's set to
            self.expires_at = timezone.now() + timedelta(hours=self.lifetime)

        super().save(*args, **kwargs)

    def extend(self, hours):
        """Extend the cluster lifetime by the given number of hours."""
        self.expires_at = models.F('expires_at') + timedelta(hours=hours)
        self.lifetime_extension_count = models.F(
            'lifetime_extension_count') + 1
        self.save()

        Metric.record('cluster-extension',
                      data={
                          'identifier': self.identifier,
                          'size': self.size,
                          'jobflow_id': self.jobflow_id,
                      })

    def deactivate(self):
        """Shutdown the cluster and update its status accordingly"""
        self.provisioner.stop(self.jobflow_id)
        self.sync()
Пример #10
0
class SparkJobRun(EditedAtModel):
    """
    A data model to store information about every individual run of a
    scheduled Spark job.

    This denormalizes some values from its related data model
    :class:`SparkJob`.
    """
    spark_job = models.ForeignKey(
        SparkJob,
        on_delete=models.CASCADE,
        related_name='runs',
        related_query_name='runs',
    )
    jobflow_id = models.CharField(
        max_length=50,
        blank=True,
        null=True,
    )
    emr_release_version = models.CharField(
        max_length=50,
        blank=True,
        null=True,
    )
    size = models.IntegerField(
        help_text="Number of computers used to run the job.",
        blank=True,
        null=True,
    )
    status = models.CharField(
        max_length=50,
        blank=True,
        default=DEFAULT_STATUS,
        db_index=True,
    )
    scheduled_at = models.DateTimeField(
        blank=True,
        null=True,
        help_text="Date/time that the job was scheduled.",
    )
    started_at = models.DateTimeField(
        blank=True,
        null=True,
        help_text="Date/time when the cluster was started on AWS EMR.")
    ready_at = models.DateTimeField(
        blank=True,
        null=True,
        help_text=
        "Date/time when the cluster was ready to run steps on AWS EMR.")
    finished_at = models.DateTimeField(
        blank=True,
        null=True,
        help_text="Date/time that the job was terminated or failed.",
    )

    objects = SparkJobRunQuerySet.as_manager()

    class Meta:
        get_latest_by = 'created_at'
        ordering = ['-created_at']

    __str__ = autostr('{self.jobflow_id}')

    def spark_job_identifier(self):
        return self.spark_job.identifier

    __repr__ = autorepr(
        ['jobflow_id', 'spark_job_identifier', 'emr_release_version', 'size'],
        spark_job_identifier=spark_job_identifier,
    )

    @property
    def info(self):
        return self.spark_job.cluster_provisioner.info(self.jobflow_id)

    def sync(self, info=None):
        """
        Updates latest status and life cycle datetimes.
        """
        if info is None:
            info = self.info
        # a mapping between what the provisioner returns what the data model uses
        model_field_map = (
            ('state', 'status'),
            ('creation_datetime', 'started_at'),
            ('ready_datetime', 'ready_at'),
            ('end_datetime', 'finished_at'),
        )
        # set the various model fields to the value the API returned
        for api_field, model_field in model_field_map:
            field_value = info.get(api_field)
            if field_value is None:
                continue
            setattr(self, model_field, field_value)

        # if the job cluster terminated with error raise the alarm
        if self.status == Cluster.STATUS_TERMINATED_WITH_ERRORS:
            transaction.on_commit(lambda: self.alert(info))
        self.save()
        return self.status

    def alert(self, info):
        self.alerts.get_or_create(
            reason_code=info['state_change_reason_code'],
            reason_message=info['state_change_reason_message'],
        )
Пример #11
0
def test_with_function_as_input():
    f = autounicode(autostr("{self.name} {foo}", foo=lambda x: 42))
    assert_equal(f(ascii), "Alex 42")