def aggregate_sql(raw, aggregation_type, aggregation_factor, periods):
    """Use SQL windows to calculate the average

    This should be quicker

    Note:
        this doesn't work in sqlite

    Todo:
        This needs work - implementing a custom Expression or Func in django and
        using it instead of annotating the annotated QS, which doesn't work
    """
    aggregations = {
        "sum": Sum,
        "mean": Avg,
        "median": median,
        "min": Min,
        "max": Max,
    }

    # pylint: disable=unreachable

    aggregation_function = aggregations[aggregation_type]

    raise NotImplementedError("This is not yet finished")

    if isinstance(aggregation_function, type) and issubclass(aggregation_function, Aggregate):
        # A tile window expression that will split the queryset into an
        # even number of chunks
        tile_window = models.Window(
            expression=window.Ntile(num_buckets=aggregation_factor),
            order_by=models.F("ts").desc(),
        )

        # A window that will calculate the average of a window
        # partitioned by the 'ntile' attribute
        aggregate_window = models.Window(
            expression = aggregation_function("value"),
            partition_by=[models.F("ntile")],
        )

        # This creates a new queryset with all the original values in
        # it, but also an extra annotated value called 'ntile' which
        # corresponds to which tile it's been partitioned into
        tiled = raw.annotate(ntile=tile_window)

        # Then we do ANOTHER window function, which partitions based on
        # this tile number and does the actual annotation.
        data = tiled.annotate(agg=aggregate_window)

        logger.info(format_query_sql(data))
    else:
        # TODO
        # just call the numpy one
        pass
Exemplo n.º 2
0
    def list(self, request):
        serializer = serializers.TopMovieQueryParamsSerializer(
            data=request.query_params)
        serializer.is_valid()
        if serializer.errors:
            response_data = {}
            for err_key, err_val in serializer.errors.items():
                err_msg = mappers.serializer_error_query_param_mapper.get(
                    err_val[0].code, 'Something is wrong.')
                response_data[err_key] = err_msg
            return Response(response_data, status=status.HTTP_400_BAD_REQUEST)

        queryset = models.Movie.objects.annotate(
            total_comments=django_models.Count(
                'comments',
                filter=Q(comments__created_at__range=[
                    serializer.validated_data['date_from'],
                    serializer.validated_data['date_to']
                ]))).annotate(rank=django_models.Window(
                    expression=window.DenseRank(),
                    order_by=django_models.F(
                        "total_comments").desc())).order_by('rank').values(
                            'id', 'rank', 'total_comments')
        serializer = serializers.TopMovieSerializer(queryset, many=True)
        return Response(serializer.data)
Exemplo n.º 3
0
Arquivo: views.py Projeto: Cosiek/mAPI
 def get_queryset(self):
     qs = Movie.objects.all() \
         .annotate(
             total_comments=models.Count('comments'),
             rank=models.Window(
                 expression=models.functions.DenseRank(),
                 order_by=models.F("total_comments").desc()
             )
         )\
         .only('id') \
         .order_by('rank')
     return qs
Exemplo n.º 4
0
 def comments_ranking(self, from_date, to_date):
     return self.annotate(
         total_comments=models.Count(
             models.Case(
                 models.When(comments__created__range=(from_date, to_date), then=1),
                 output_field=models.IntegerField(),
             )
         )
     ).annotate(
         rank=models.Window(
             expression=DenseRank(), order_by=models.F('total_comments').desc()
         )
     )
Exemplo n.º 5
0
def add_donation_number(apps, schema_editor):
    from django.db.models.functions import RowNumber

    Donation = apps.get_model('fds_donation', 'Donation')

    donations = Donation.objects.filter(completed=True).annotate(
        new_number=models.Window(
            expression=RowNumber(),
            partition_by=[models.F('donor_id')],
            order_by=models.F('timestamp').asc(),
        ))
    for d in donations:
        if d.number != d.new_number:
            Donation.objects.filter(id=d.id).update(number=d.new_number)
Exemplo n.º 6
0
    def save(self, *args, **kwargs):
        ret = super().save(*args, **kwargs)

        donations = Donation.objects.filter(
            donor_id=self.donor_id,
            completed=True).annotate(new_number=models.Window(
                expression=RowNumber(),
                order_by=models.F('timestamp').asc(),
            ))
        # Can't call .update() directly due to Django
        # ORM limitations, loop and update:
        for d in donations:
            if d.number != d.new_number:
                Donation.objects.filter(id=d.id).update(number=d.new_number)
        return ret
Exemplo n.º 7
0
 def diff_vs_previous_order(self):
     return self.annotate(prev_order_id=models.Window(
         expression=Lag('id'),
         partition_by=[models.F('customer_id')],
         order_by=models.F('created_at').asc(),
     ))
Exemplo n.º 8
0
    def latest_ts_data_optimised(cls, devices):
        """Get latest ts data for multiple devices

        Args:
            devices (list): list of devices to get data for

        Returns:
            dict: mapping of sensor name: latest TimeSeriesData reading

        Todo:
            inner join on the sensors/devices will take nulls into account
            without raising a doesnotexist error
        """
        from zconnect.zc_timeseries.models import DeviceSensor, TimeSeriesData

        if "postgres" not in settings.DATABASES["default"]["ENGINE"]:
            logger.warning(
                "Using slower method of getting ts data when not using postgresql"
            )

            readings = {}

            for device in devices:
                device_readings = {}

                for sensor in device.sensors.all():
                    data = sensor.get_latest_ts_data()
                    logger.debug("Latest for %s: %s",
                                 sensor.sensor_type.sensor_name, data)
                    device_readings[sensor.sensor_type.sensor_name] = data

                readings[device.id] = device_readings

            return readings

        # Define a window that partitions the data by the sensor and annotates
        # each row with the 'first' value, based on the timestamp (eg, the most
        # recent)
        w_last = models.Window(
            expression=window.FirstValue("id"),
            partition_by=models.F("sensor"),
            order_by=models.F("ts").desc(),
        )

        # All sensors for all devices
        sensors = DeviceSensor.objects.filter(device__in=devices)

        # Use the window to get a set of the ids corresponding to the most
        # recent timestamps for each sensor
        ids = (
            TimeSeriesData.objects
            # Filter by sensor
            .filter(sensor__in=sensors)
            # Annotate rows with the most recent timestamp for that sensor
            .annotate(last_reading_id=w_last)
            # Needed for distinct() to work
            .order_by()
            # Only get unique last_reading_id. Don't care which one as we
            # extract this field anyway
            .distinct("last_reading_id")

            # Return only this annotated value
            .values_list("last_reading_id", flat=True))

        # Then just filter on the ids
        raw_data = (
            TimeSeriesData.objects.filter(pk__in=ids)

            # Prefetch here or else it generates a new query for each access
            # to .sensor or .sensor.sensor_type
            # This is 2 extra queries, but it's ALWAYS 2, not 2 for each device
            .prefetch_related("sensor__sensor_type"))

        # Then 'annotate' the names into a dictionary
        data = {device.id: {} for device in devices}
        for d in raw_data:
            device_id = d.sensor.device_id
            sensor_name = d.sensor.sensor_type.sensor_name
            data[device_id][sensor_name] = d

        logger.debug(data)
        return data