def median_entity_salary(self): q = Salary.objects.filter( Q(job__position__employer__parent=self.object) | Q(job__position__employer=self.object)) results = q.all().aggregate( median=Percentile('amount', 0.5, output_field=FloatField())) return results['median']
def get_queryset(self): req=self.request airport1 = req.GET.get('airport1') airport2 = req.GET.get('airport2') carrier = req.GET.get('carrier') if airport1 and airport2 and carrier: return Data.objects.all().filter( Q(airport_code=airport1) | Q(airport_code=airport2) ).filter( carrier_code=carrier ).values( 'statistics_minutesdelayed_carrier', 'statistics_minutesdelayed_lateaircraft' ).annotate( mean_carier = Avg('statistics_minutesdelayed_carrier'), mean_lateaircraft = Avg('statistics_minutesdelayed_lateaircraft'), median_carier=Percentile('statistics_minutesdelayed_carrier', 0.5, output_field=models.FloatField()), median_lateaircraft=Percentile('statistics_minutesdelayed_carrier', 0.5, output_field=models.FloatField()), std_dev_carrier = StdDev('statistics_minutesdelayed_carrier'), std_dev_lateaircraft = StdDev('statistics_minutesdelayed_lateaircraft') ) elif airport1 and airport2: return Data.objects.all().filter( Q(airport_code=airport1) | Q(airport_code=airport2) ).values( 'statistics_minutesdelayed_carrier', 'statistics_minutesdelayed_lateaircraft' ).annotate( mean_carier = Avg('statistics_minutesdelayed_carrier'), mean_lateaircraft = Avg('statistics_minutesdelayed_lateaircraft'), median_carier=Percentile('statistics_minutesdelayed_carrier', 0.5, output_field=models.FloatField()), median_lateaircraft=Percentile('statistics_minutesdelayed_carrier', 0.5, output_field=models.FloatField()), std_dev_carrier = StdDev('statistics_minutesdelayed_carrier'), std_dev_lateaircraft = StdDev('statistics_minutesdelayed_lateaircraft') )
def employer_median_salaries(self): if not hasattr(self, '_employer_median_salaries'): median_base_pay = Percentile('positions__jobs__salaries__amount', 0.5, filter=self.salary_q, output_field=FloatField()) median_extra_pay = Percentile( 'positions__jobs__salaries__extra_pay', 0.5, filter=self.salary_q, output_field=FloatField()) median_total_pay = Percentile(NullIf( Coalesce('positions__jobs__salaries__amount', 0) + Coalesce('positions__jobs__salaries__extra_pay', 0), 0), 0.5, filter=self.salary_q, output_field=FloatField()) self._employer_median_salaries = self.employer_queryset.aggregate( median_base_pay=Coalesce(median_base_pay, 0), median_extra_pay=Coalesce(median_extra_pay, 0), median_total_pay=Coalesce(median_total_pay, 0)) return self._employer_median_salaries
def label_timing(request, project_pk): """This function finds and returns the requested label time metrics. This is used by the graphs on the admin page to show how long each labeler is taking. Args: request: The POST request project_pk: Primary key of the project Returns: a dictionary of label timing information. """ project = Project.objects.get(pk=project_pk) users = [] users.append(project.creator) users.extend( [perm.profile for perm in project.projectpermissions_set.all()]) dataset = [] yDomain = 0 for u in users: result = DataLabel.objects.filter( data__project=project_pk, profile=u).aggregate(quartiles=Percentile( "time_to_label", [0.05, 0.25, 0.5, 0.75, 0.95], continuous=False, output_field=ArrayField(FloatField()), )) if result["quartiles"]: if result["quartiles"][4] > yDomain: yDomain = result["quartiles"][4] + 10 temp = { "label": u.__str__(), "values": { "Q1": result["quartiles"][1], "Q2": result["quartiles"][2], "Q3": result["quartiles"][3], "whisker_low": result["quartiles"][0], "whisker_high": result["quartiles"][4], }, } dataset.append(temp) return Response({"data": dataset, "yDomain": yDomain})
def benefit_aggregates(self): data = self._cache.get('benefit_aggregates', None) if data is None: aggregates = Benefit.objects\ .values('fund__name', 'data_year')\ .annotate(median=Percentile('amount', 0.5, output_field=FloatField()), count=Count('id')) data = {year: {} for year in self.data_years} for year in data.keys(): agg = [a for a in aggregates if a['data_year'] == year] for a in agg: data[year][a['fund__name']] = { 'median': self._format_large_number(a['median']), 'count': self._format_large_number(a['count']), } self._cache['benefit_aggregates'] = data return data
def NearMax(field): "DB Statistical function for almost the maximum but not quite." return Percentile(field, 0.95, output_field=FloatField())