示例#1
0
    def __init__(
        self,
        fields,
        start_date,
        end_date,
        dimensions,
        dimension_values,
        granularity='all',
        query_client=None,
        compute_field_calculation=None,
    ):
        self.fields = fields
        self.start_date = start_date
        self.end_date = end_date
        self.dimension_values = dimension_values

        if not compute_field_calculation:
            from config.aggregation_rules import get_calculation_for_fields

            compute_field_calculation = get_calculation_for_fields

        # Build the params needed for ValidationRow
        intervals = [build_time_interval(self.start_date, self.end_date)]
        calculation = compute_field_calculation(fields)
        dimension_filter = GroupByQueryBuilder.build_dimension_filter(
            [dimension_values])
        super(FieldValidationRow, self).__init__(
            intervals,
            calculation,
            dimensions,
            dimension_filter,
            granularity,
            query_client,
            compute_field_calculation,
        )
示例#2
0
def get_fiscal_granularity_intervals(fiscal_start_month, start_date, end_date,
                                     granularity):
    '''Generate a list of druid date intervals that encompass the specified dates for
    the given granularity.'''
    intervals = []
    if start_date >= end_date:
        return intervals

    buckets = _build_month_buckets(fiscal_start_month, granularity)
    idx = find_bucket_index(start_date.month, buckets)

    start_month = buckets[idx]
    start_year = start_date.year
    # Handle when the start bucket pulls us into the previous calendar year
    if start_month > start_date.month:
        start_year -= 1
    interval_start = datetime.date(start_year, start_month, 1)

    end_year = start_year
    while interval_start < end_date:
        # Treat buckets like a circular list
        idx = (idx + 1) % len(buckets)
        end_month = buckets[idx]

        # Handle when the next bucket is in the next calendar year
        if end_month <= start_month:
            end_year += 1
        interval_end = datetime.date(end_year, end_month, 1)

        interval = build_time_interval(interval_start, interval_end)
        intervals.append(interval)
        interval_start = interval_end
        start_month = end_month

    return intervals
示例#3
0
def get_buckets(start_date, end_date, granularity):
    intervals = []
    if start_date >= end_date:
        return intervals

    (et_start_year, et_start_month,
     _) = EthiopianDateConverter.date_to_ethiopian(start_date)

    buckets = GRANULARITY_BUCKETS[granularity]
    idx = find_bucket_index(et_start_month, buckets)

    start_month = buckets[idx]
    start_year = et_start_year
    # Handle when the start bucket pulls us into the previous calendar year
    if start_month > et_start_month:
        start_year -= 1
    interval_start = _to_gregorian_datetime(start_year, start_month, 1)

    end_year = start_year
    while interval_start < end_date:
        # Treat buckets like a circular list
        idx = (idx + 1) % len(buckets)
        end_month = buckets[idx]

        # Handle when the next bucket is in the next calendar year
        if end_month <= start_month:
            end_year += 1
        interval_end = _to_gregorian_datetime(end_year, end_month, 1)

        interval = build_time_interval(interval_start, interval_end)
        intervals.append(interval)
        interval_start = interval_end
        start_month = end_month

    return intervals
示例#4
0
 def get_full_time_interval(self):
     """Return a string of druid date interval covering the whole range of data
     with [minTime.date, maxTime.date+1day) from the time_boundary query.
     """
     event = self.get_data_time_boundary()
     mintime = _datetime_from_iso(event['result']['minTime'])
     maxtime = _datetime_from_iso(event['result']['maxTime'])
     maxtime = maxtime + timedelta(days=1)
     return build_time_interval(mintime, maxtime)
示例#5
0
文件: base.py 项目: vedantr/Harmony
    def __init__(self, request, query_client):
        # $ConfigImportHack
        # HACK(vedant) - Because we want to completely break the dependence on the
        # configuration import hack, we need to reference values via a dynamic import
        self.dimension_slices = current_app.zen_config.aggregation.DIMENSION_SLICES
        self.get_calculation_for_fields = (
            current_app.zen_config.aggregation_rules.get_calculation_for_fields
        )
        self.get_granularity_for_interval = (
            current_app.zen_config.aggregation_rules.get_granularity_for_interval
        )
        self.datasource = current_app.druid_context.current_datasource

        # HACK(stephen):
        # Support legacy behavior where only a single dimension is
        # requested by the frontend. If this happens, we need to
        # grab all the dimensions to group by from the config
        # TODO(stephen): Have frontend specify exact dimensions
        # TODO(stephen): Dimension IDs would be awesome to have in the
        # DB so that we can use TopN and Timeseries more
        dimensions = request['dimensions']
        if len(dimensions) == 1:
            dimension = dimensions[0]
            dimensions = list(self.dimension_slices.get(dimension, [dimension]))
        # END HACK(stephen)

        self.query_client = query_client

        self._dimensions = dimensions
        self._fields = request['fields']
        self._calculation = self.get_calculation_for_fields(self._fields)
        self._calculation.set_strict_null_fields(self._fields)

        (start_date, end_date) = _get_dates_from_request(request)
        self._granularity = self.get_granularity_for_interval(
            request['granularity'], start_date, end_date
        )
        self._interval = build_time_interval(start_date, end_date)

        # Query filter is optional
        self._query_filter = None
        requested_filter = request.get('queryFilter', {})
        if requested_filter:
            self._query_filter = build_filter_from_dict(requested_filter)

        self._value_groups = request.get('valueGroups', {})
        self._validate_value_groups(self._value_groups)

        # The list of numeric fields to build results for. Defaults to all
        # requested fields + all computed fields
        self._numeric_fields = []
        for field in self._fields + list(self._value_groups.keys()):
            # TODO(stephen): Would love this to be an ordered set. It is
            # a list for now since we want to preserve requested field order.
            if field not in self._numeric_fields:
                self._numeric_fields.append(field)

        # TODO(stephen): Do something with result filter. It is an optimization
        # for dashboards that allows results to be fully filtered and reduced
        # before hitting the frontend.
        self._result_filter = request.get('resultFilter')
示例#6
0
    def to_druid_intervals(self):
        if self.all_time:
            return []

        intervals = self._get_tree_intervals()
        return [build_time_interval(*i) for i in intervals]
示例#7
0
    def run_query(self):
        '''
        Constructs and runs the Druid request for this query. The query is
        blocking.
        '''

        LOG.info('Running query...')

        # Filter the dimensions using the location filters passed in
        dimension_filter = GroupByQueryBuilder.build_dimension_filter(
            self.location_filters
        )

        # AND the selected locations with the non-location filters requested
        dimension_filter &= self.non_hierarchical_filter

        # Slice by selected granularity + all fields less specific than it. For
        # example, if user makes a Woreda query, we also want to slice by Zone
        # and Region.
        if self.geo_field:
            # Restrict query to non-null for the given geo
            dimension_filter &= Dimension(self.geo_field) != ''

            # Set the appropriate dimensions for this query
            self.druid_slice_dimensions = self.get_slice_dimensions()
            if self.latitude_field and self.longitude_field:
                self.druid_geo_dimensions = [self.latitude_field, self.longitude_field]

        grouping_fields = self.druid_slice_dimensions + self.druid_geo_dimensions

        batches = []
        overall_interval = build_time_interval(self.start_date, self.end_date)
        for selected_granularity in self.selected_granularities:
            granularity = selected_granularity
            intervals = [overall_interval]  # Druid expects time intervals as
            # a list
            granularity = current_app.zen_config.aggregation_rules.get_granularity_for_interval(
                selected_granularity, self.start_date, self.end_date
            )

            query = GroupByQueryBuilder(
                datasource=current_app.druid_context.current_datasource.name,
                granularity=granularity,
                grouping_fields=grouping_fields,
                intervals=intervals,
                calculation=self.calculation,
                dimension_filter=dimension_filter,
            )

            batch = QueryBatch(
                query,
                selected_granularity,
                self.geo_field,
                self.latitude_field,
                self.longitude_field,
                self.ordered_fields,
                self.denom,
                self.druid_slice_dimensions,
                self.query_client,
            )
            batches.append(batch)

        num_granularities = len(self.selected_granularities)
        if USE_THREAD_POOL and num_granularities > 1:
            pool = ThreadPool(num_granularities)
            pool.map(QueryBatch.run, batches)
            pool.close()
            pool.join()
        else:
            _ = [batch.run() for batch in batches]

        self.batches = batches
        return True