def aggregate_sql(raw, aggregation_type, aggregation_factor, periods): """Use SQL windows to calculate the average This should be quicker Note: this doesn't work in sqlite Todo: This needs work - implementing a custom Expression or Func in django and using it instead of annotating the annotated QS, which doesn't work """ aggregations = { "sum": Sum, "mean": Avg, "median": median, "min": Min, "max": Max, } # pylint: disable=unreachable aggregation_function = aggregations[aggregation_type] raise NotImplementedError("This is not yet finished") if isinstance(aggregation_function, type) and issubclass(aggregation_function, Aggregate): # A tile window expression that will split the queryset into an # even number of chunks tile_window = models.Window( expression=window.Ntile(num_buckets=aggregation_factor), order_by=models.F("ts").desc(), ) # A window that will calculate the average of a window # partitioned by the 'ntile' attribute aggregate_window = models.Window( expression = aggregation_function("value"), partition_by=[models.F("ntile")], ) # This creates a new queryset with all the original values in # it, but also an extra annotated value called 'ntile' which # corresponds to which tile it's been partitioned into tiled = raw.annotate(ntile=tile_window) # Then we do ANOTHER window function, which partitions based on # this tile number and does the actual annotation. data = tiled.annotate(agg=aggregate_window) logger.info(format_query_sql(data)) else: # TODO # just call the numpy one pass
def list(self, request): serializer = serializers.TopMovieQueryParamsSerializer( data=request.query_params) serializer.is_valid() if serializer.errors: response_data = {} for err_key, err_val in serializer.errors.items(): err_msg = mappers.serializer_error_query_param_mapper.get( err_val[0].code, 'Something is wrong.') response_data[err_key] = err_msg return Response(response_data, status=status.HTTP_400_BAD_REQUEST) queryset = models.Movie.objects.annotate( total_comments=django_models.Count( 'comments', filter=Q(comments__created_at__range=[ serializer.validated_data['date_from'], serializer.validated_data['date_to'] ]))).annotate(rank=django_models.Window( expression=window.DenseRank(), order_by=django_models.F( "total_comments").desc())).order_by('rank').values( 'id', 'rank', 'total_comments') serializer = serializers.TopMovieSerializer(queryset, many=True) return Response(serializer.data)
def get_queryset(self): qs = Movie.objects.all() \ .annotate( total_comments=models.Count('comments'), rank=models.Window( expression=models.functions.DenseRank(), order_by=models.F("total_comments").desc() ) )\ .only('id') \ .order_by('rank') return qs
def comments_ranking(self, from_date, to_date): return self.annotate( total_comments=models.Count( models.Case( models.When(comments__created__range=(from_date, to_date), then=1), output_field=models.IntegerField(), ) ) ).annotate( rank=models.Window( expression=DenseRank(), order_by=models.F('total_comments').desc() ) )
def add_donation_number(apps, schema_editor): from django.db.models.functions import RowNumber Donation = apps.get_model('fds_donation', 'Donation') donations = Donation.objects.filter(completed=True).annotate( new_number=models.Window( expression=RowNumber(), partition_by=[models.F('donor_id')], order_by=models.F('timestamp').asc(), )) for d in donations: if d.number != d.new_number: Donation.objects.filter(id=d.id).update(number=d.new_number)
def save(self, *args, **kwargs): ret = super().save(*args, **kwargs) donations = Donation.objects.filter( donor_id=self.donor_id, completed=True).annotate(new_number=models.Window( expression=RowNumber(), order_by=models.F('timestamp').asc(), )) # Can't call .update() directly due to Django # ORM limitations, loop and update: for d in donations: if d.number != d.new_number: Donation.objects.filter(id=d.id).update(number=d.new_number) return ret
def diff_vs_previous_order(self): return self.annotate(prev_order_id=models.Window( expression=Lag('id'), partition_by=[models.F('customer_id')], order_by=models.F('created_at').asc(), ))
def latest_ts_data_optimised(cls, devices): """Get latest ts data for multiple devices Args: devices (list): list of devices to get data for Returns: dict: mapping of sensor name: latest TimeSeriesData reading Todo: inner join on the sensors/devices will take nulls into account without raising a doesnotexist error """ from zconnect.zc_timeseries.models import DeviceSensor, TimeSeriesData if "postgres" not in settings.DATABASES["default"]["ENGINE"]: logger.warning( "Using slower method of getting ts data when not using postgresql" ) readings = {} for device in devices: device_readings = {} for sensor in device.sensors.all(): data = sensor.get_latest_ts_data() logger.debug("Latest for %s: %s", sensor.sensor_type.sensor_name, data) device_readings[sensor.sensor_type.sensor_name] = data readings[device.id] = device_readings return readings # Define a window that partitions the data by the sensor and annotates # each row with the 'first' value, based on the timestamp (eg, the most # recent) w_last = models.Window( expression=window.FirstValue("id"), partition_by=models.F("sensor"), order_by=models.F("ts").desc(), ) # All sensors for all devices sensors = DeviceSensor.objects.filter(device__in=devices) # Use the window to get a set of the ids corresponding to the most # recent timestamps for each sensor ids = ( TimeSeriesData.objects # Filter by sensor .filter(sensor__in=sensors) # Annotate rows with the most recent timestamp for that sensor .annotate(last_reading_id=w_last) # Needed for distinct() to work .order_by() # Only get unique last_reading_id. Don't care which one as we # extract this field anyway .distinct("last_reading_id") # Return only this annotated value .values_list("last_reading_id", flat=True)) # Then just filter on the ids raw_data = ( TimeSeriesData.objects.filter(pk__in=ids) # Prefetch here or else it generates a new query for each access # to .sensor or .sensor.sensor_type # This is 2 extra queries, but it's ALWAYS 2, not 2 for each device .prefetch_related("sensor__sensor_type")) # Then 'annotate' the names into a dictionary data = {device.id: {} for device in devices} for d in raw_data: device_id = d.sensor.device_id sensor_name = d.sensor.sensor_type.sensor_name data[device_id][sensor_name] = d logger.debug(data) return data