def save(self, *args, **kwargs):
        """Insert the cluster into the database or update it if already
        present, spawning the cluster if it's not already spawned.
        """
        # actually start the cluster
        if self.jobflow_id is None:
            self.jobflow_id = self.provisioner.start(
                user_username=self.created_by.username,
                user_email=self.created_by.email,
                identifier=self.identifier,
                emr_release=self.emr_release.version,
                size=self.size,
                public_key=self.ssh_key.key,
            )
            # once we've stored the jobflow id we can fetch the status for the first time
            transaction.on_commit(self.sync)

            Metric.record('cluster-emr-version',
                          data={'version': self.emr_release.version})

        # set the dates
        if not self.expires_at:
            # clusters should expire after the lifetime it's set to
            self.expires_at = timezone.now() + timedelta(hours=self.lifetime)

        super().save(*args, **kwargs)
    def save(self, *args, **kwargs):
        """Insert the cluster into the database or update it if already
        present, spawning the cluster if it's not already spawned.
        """
        # actually start the cluster
        if self.jobflow_id is None:
            self.jobflow_id = self.provisioner.start(
                user_username=self.created_by.username,
                user_email=self.created_by.email,
                identifier=self.identifier,
                emr_release=self.emr_release.version,
                size=self.size,
                public_key=self.ssh_key.key,
            )
            # once we've stored the jobflow id we can fetch the status for the first time
            transaction.on_commit(self.sync)

            Metric.record('cluster-emr-version',
                          data={'version': self.emr_release.version})

        # set the dates
        if not self.expires_at:
            # clusters should expire after the lifetime it's set to
            self.expires_at = timezone.now() + timedelta(hours=self.lifetime)

        super().save(*args, **kwargs)
    def extend(self, hours):
        """Extend the cluster lifetime by the given number of hours."""
        self.expires_at = models.F('expires_at') + timedelta(hours=hours)
        self.lifetime_extension_count = models.F('lifetime_extension_count') + 1
        self.save()

        Metric.record('cluster-extension', data={
            'identifier': self.identifier,
            'size': self.size,
            'jobflow_id': self.jobflow_id,
        })
    def extend(self, hours):
        """Extend the cluster lifetime by the given number of hours."""
        self.expires_at = models.F('expires_at') + timedelta(hours=hours)
        self.lifetime_extension_count = models.F(
            'lifetime_extension_count') + 1
        self.save()

        Metric.record('cluster-extension',
                      data={
                          'identifier': self.identifier,
                          'size': self.size,
                          'jobflow_id': self.jobflow_id,
                      })
示例#5
0
def test_metrics_record(now, one_hour_ago):
    Metric.record("metric-key-1")
    Metric.record("metric-key-2", 500)
    Metric.record("metric-key-3", data={"other-value": "test"})
    Metric.record("metric-key-4",
                  created_at=one_hour_ago,
                  data={"other-value-2": 100})

    m = Metric.objects.get(key="metric-key-1")
    assert m.value == 1
    assert m.created_at.replace(microsecond=0) == now
    assert m.data is None

    m = Metric.objects.get(key="metric-key-2")
    assert m.value == 500
    assert m.created_at.replace(microsecond=0) == now
    assert m.data is None

    m = Metric.objects.get(key="metric-key-3")
    assert m.value == 1
    assert m.created_at.replace(microsecond=0) == now
    assert m.data == {"other-value": "test"}

    m = Metric.objects.get(key="metric-key-4")
    assert m.value == 1
    assert m.created_at.replace(microsecond=0) == one_hour_ago
    assert m.data == {"other-value-2": 100}
示例#6
0
def test_metrics_record(now, one_hour_ago):
    Metric.record('metric-key-1')
    Metric.record('metric-key-2', 500)
    Metric.record('metric-key-3', data={'other-value': 'test'})
    Metric.record('metric-key-4', created_at=one_hour_ago,
                  data={'other-value-2': 100})

    m = Metric.objects.get(key='metric-key-1')
    assert m.value == 1
    assert m.created_at.replace(microsecond=0) == now
    assert m.data is None

    m = Metric.objects.get(key='metric-key-2')
    assert m.value == 500
    assert m.created_at.replace(microsecond=0) == now
    assert m.data is None

    m = Metric.objects.get(key='metric-key-3')
    assert m.value == 1
    assert m.created_at.replace(microsecond=0) == now
    assert m.data == {'other-value': 'test'}

    m = Metric.objects.get(key='metric-key-4')
    assert m.value == 1
    assert m.created_at.replace(microsecond=0) == one_hour_ago
    assert m.data == {'other-value-2': 100}
示例#7
0
    def extend(self, hours):
        """Extend the cluster lifetime by the given number of hours."""
        self.expires_at = models.F("expires_at") + timedelta(hours=hours)
        self.lifetime_extension_count = models.F(
            "lifetime_extension_count") + 1
        self.expiration_mail_sent = False
        self.save()

        with transaction.atomic():
            Metric.record(
                "cluster-extension",
                data={
                    "identifier": self.identifier,
                    "size": self.size,
                    "jobflow_id": self.jobflow_id,
                },
            )
    def sync(self, info=None):
        """Should be called to update latest cluster status in `self.most_recent_status`."""
        if info is None:
            info = self.info

        # Map AWS API fields to Cluster model fields.
        model_field_map = (
            ('state', 'most_recent_status'),
            ('public_dns', 'master_address'),
            ('creation_datetime', 'started_at'),
            ('ready_datetime', 'ready_at'),
            ('end_datetime', 'finished_at'),
        )
        save_needed = False
        date_fields_updated = False

        # set the various model fields to the value the API returned
        for api_field, model_field in model_field_map:
            field_value = info.get(api_field)
            # Only update the field if the value for a field is not set or it
            # hasn't changed.
            if field_value is None or field_value == getattr(
                    self, model_field):
                continue
            setattr(self, model_field, field_value)
            save_needed = True

            if model_field in ('started_at', 'ready_at', 'finished_at'):
                date_fields_updated = True

        if save_needed:
            self.save()

        if date_fields_updated:

            if self.finished_at:
                # When cluster is finished, record normalized instance hours.
                hours = math.ceil(
                    (self.finished_at - self.started_at).seconds / 60 / 60)
                normalized_hours = hours * self.size
                Metric.record('cluster-normalized-instance-hours',
                              normalized_hours,
                              data={
                                  'identifier': self.identifier,
                                  'size': self.size,
                                  'jobflow_id': self.jobflow_id,
                              })

            # When cluster is ready, record a count and time to ready.
            if self.ready_at and not self.finished_at:
                # A simple count to track number of clusters spun up
                # successfully.
                Metric.record('cluster-ready',
                              data={
                                  'identifier': self.identifier,
                                  'size': self.size,
                                  'jobflow_id': self.jobflow_id,
                              })
                # Time in seconds it took the cluster to be ready.
                time_to_ready = (self.ready_at - self.started_at).seconds
                Metric.record('cluster-time-to-ready',
                              time_to_ready,
                              data={
                                  'identifier': self.identifier,
                                  'size': self.size,
                                  'jobflow_id': self.jobflow_id,
                              })
    def sync(self, info=None):
        """Should be called to update latest cluster status in `self.most_recent_status`."""
        if info is None:
            info = self.info

        # Map AWS API fields to Cluster model fields.
        model_field_map = (
            ('state', 'most_recent_status'),
            ('public_dns', 'master_address'),
            ('creation_datetime', 'started_at'),
            ('ready_datetime', 'ready_at'),
            ('end_datetime', 'finished_at'),
        )
        save_needed = False
        date_fields_updated = False

        # set the various model fields to the value the API returned
        for api_field, model_field in model_field_map:
            field_value = info.get(api_field)
            # Only update the field if the value for a field is not set or it
            # hasn't changed.
            if field_value is None or field_value == getattr(self, model_field):
                continue
            setattr(self, model_field, field_value)
            save_needed = True

            if model_field in ('started_at', 'ready_at', 'finished_at'):
                date_fields_updated = True

        if save_needed:
            self.save()

        if date_fields_updated:

            if self.finished_at:
                # When cluster is finished, record normalized instance hours.
                hours = math.ceil(
                    (self.finished_at - self.started_at).seconds / 60 / 60
                )
                normalized_hours = hours * self.size
                Metric.record(
                    'cluster-normalized-instance-hours', normalized_hours,
                    data={
                        'identifier': self.identifier,
                        'size': self.size,
                        'jobflow_id': self.jobflow_id,
                    }
                )

            # When cluster is ready, record a count and time to ready.
            if self.ready_at and not self.finished_at:
                # A simple count to track number of clusters spun up
                # successfully.
                Metric.record('cluster-ready', data={
                    'identifier': self.identifier,
                    'size': self.size,
                    'jobflow_id': self.jobflow_id,
                })
                # Time in seconds it took the cluster to be ready.
                time_to_ready = (self.ready_at - self.started_at).seconds
                Metric.record(
                    'cluster-time-to-ready', time_to_ready,
                    data={
                        'identifier': self.identifier,
                        'size': self.size,
                        'jobflow_id': self.jobflow_id,
                    }
                )