def test_get_subrequest_taxons_in_preagg_filter(mock__get_taxons):
    req = BlendingDataRequest({
        'data_subrequests': [{
            'taxons': ['spend'],
            'preaggregation_filters': {
                'type': 'taxon_value',
                'taxon': 'company_id',
                'operator': '=',
                'value': '57',
            },
            'scope': {
                'preaggregation_filters': {
                    'type': 'taxon_value',
                    'taxon': 'account_id',
                    'operator': '=',
                    'value': '57',
                },
            },
            'properties': {
                'data_sources': ['facebook_ads']
            },
            'origin': {
                'system': 'test-case'
            },
        }],
        'taxons': ['spend'],
        'origin': {
            'system': 'test-case'
        },
    })
    manager = BlendingTaxonManager(req)
    manager.load_all_used_taxons(SNOWFLAKE_HUSKY_CONTEXT)
    slugs = manager.get_subrequest_taxons(req.data_subrequests[0])
    assert sorted(slugs) == ['company_id', 'spend']
 def setUp(self) -> None:
     super().setUp()
     self.request = BlendingDataRequest({
         "data_subrequests": [{
             "scope": {
                 "company_id": "50",
                 "preaggregation_filters": {
                     'type':
                     'group',
                     'logical_operator':
                     'AND',
                     'clauses': [
                         {
                             'type': 'taxon_value',
                             'taxon': 'account_id',
                             'operator': '=',
                             'value': '595126134331606',
                         },
                     ],
                 },
             },
             "properties": {
                 "data_sources": ["facebook_ads"]
             },
             "taxons": ["account_id", "spend", "cpm"],
         }],
         "comparison":
         ComparisonConfig({
             'scope': ComparisonScopeType.company.value,
             'taxons': ['objective']
         }).to_native(),
     })
示例#3
0
    def _build_data_blend_query(
        cls,
        ctx: HuskyQueryContext,
        taxon_manager: BlendingTaxonManager,
        config_arg: BlendingDataRequest,
        query_info: BlendingQueryInfo,
    ) -> Dataframe:
        """
        Builds subquery for each subrequest and then blends them all into one dataframe.
        :param ctx: Husky query context
        """
        dataframes = []
        request = BlendingDataRequest(config_arg.to_native(
        ))  # Clone, coz we will be modifying subqueries
        for subrequest in request.data_subrequests:
            # add comparison taxons to data subrequest taxons
            subrequest.taxons = taxon_manager.get_subrequest_taxons(subrequest)
            sub_query_info = QueryInfo({
                'used_raw_taxons': subrequest.taxons,
            })
            query_info.subrequests_info.append(sub_query_info)

            # Build query for subrequest and add it to the list
            data_source = subrequest.properties.data_source
            dimension_templates = taxon_manager.plan.data_source_formula_templates[
                data_source]
            filter_templates = taxon_manager.plan.data_source_filter_templates[
                data_source]
            df = MainQueryBuilder.build_query(
                ctx,
                subrequest.to_internal_model(),
                sub_query_info,
                taxon_manager.used_taxons,
                dimension_templates,
                filter_templates=filter_templates,
                allowed_physical_data_sources=set(
                    request.physical_data_sources)
                if request.physical_data_sources else None,
            )
            dataframes.append(df)

        return blend_dataframes(
            ctx, dataframes, taxon_manager.plan.data_source_formula_templates)
示例#4
0
    def compile_transformation_request(cls, req: TransformRequest, company_id: str) -> Tuple[str, HuskyQueryRuntime]:
        """
        Compiles Transform request to its SQL representation

        :param req: Input request
        :param company_id: Company ID

        :return: SQL and type of dialect
        """
        sorted_fields = sorted(req.requested_fields)
        # prepare origin description
        origin = DataRequestOrigin(
            {
                'system': 'FDQ',
                'extra': {
                    'purpose': 'taxonomy.transform.compile',
                },
            }
        )

        # get all used taxons in the request
        used_taxons_map = fetch_all_used_taxons_map(company_id, sorted_fields)

        # figure out set of all virtual data sources covered by the taxons in the request
        used_vds = {taxon.data_source for taxon in used_taxons_map.values() if taxon.data_source}

        # generate subrequest for each virtual data source
        # this will allow Husky to push the taxons into relevant subrequests
        subrequests = []
        for vds in sorted(used_vds):
            subrequest = ApiDataRequest({'scope': {'company_id': company_id}, 'properties': {'data_sources': [vds]}})

            subrequests.append(subrequest)

        # finalize the blending husky request
        husky_request_dict = {'data_subrequests': subrequests, 'taxons': req.requested_fields, 'origin': origin}

        husky_request = BlendingDataRequest(husky_request_dict)

        connection = Connection.get()

        query_runtime_name = Connection.get_dialect_name(connection)
        query_runtime = EnumHelper.from_value_safe(HuskyQueryRuntime, query_runtime_name)
        context = HuskyQueryContext(query_runtime)

        husky_dataframe = QueryBuilder.validate_data_request(context, husky_request)

        # add another layer of query to use correct names
        final_query = cls._correct_column_aliases(context, husky_dataframe)

        return compile_query(final_query, context.dialect), context.query_runtime
示例#5
0
def preprocess_request(req: BlendingDataRequest):
    """
    Helper fn that is moving some values around, to be backward compatible.
    """

    # Move order by from subrequests to top level
    for subrequest in req.data_subrequests:
        if subrequest.order_by:
            req.order_by.extend(subrequest.order_by)
            subrequest.order_by = []

    # Add taxons from grouping sets to top level, so they can be copied to all subrequests.
    # Otherwise computed dimensions would not be included in the result.
    grouping_sets_taxons = {item for sublist in (req.grouping_sets or []) for item in (sublist or [])}
    req.taxons = req.taxons or []
    req.taxons.extend(grouping_sets_taxons)
    move_top_level_to_subrequests(req.taxons, req.data_subrequests)

    if req.grouping_sets and req.fill_date_gaps:
        raise InvalidRequest('request.fill_date_gaps', 'fill_date_gaps is not supported when used with grouping sets.')
 def setUp(self) -> None:
     super().setUp()
     self._twitter_acc_id = 'acc_id_tw123'
     self._fb_acc_id = 'acc_id_456'
     self._blending_request = BlendingDataRequest({
         "data_subrequests": [
             {
                 "scope": {
                     "preaggregation_filters": {
                         "type": "taxon_value",
                         "taxon": "account_id",
                         "value": self._twitter_acc_id,
                         "operator": "=",
                     }
                 },
                 "properties": {
                     "data_sources": ["twitter"]
                 },
             },
             {
                 "scope": {
                     "preaggregation_filters": {
                         "type": "taxon_value",
                         "taxon": "account_id",
                         "value": self._fb_acc_id,
                         "operator": "=",
                     }
                 },
                 "properties": {
                     "data_sources": ["facebook_ads"]
                 },
             },
         ],
         "taxons": ["fb_tw_merged_objective", "generic_cpm"],
         "limit":
         100,
     })
     self._info = BlendingQueryInfo.create(self._blending_request,
                                           SNOWFLAKE_HUSKY_CONTEXT)
    def build_query(
            cls,
            ctx: HuskyQueryContext,
            req: BlendingDataRequest,
            query_info: Optional[BlendingQueryInfo] = None) -> Dataframe:
        """
        Builds blended query

        Adding suggested comparison taxons (if desired, but missing)
        - attempt to use provided rules and generate the query using all taxons from the matched rule
        - if it fails, fall back to using only taxon Data Source as comparison taxon

        :param ctx: Husky query context
        :param req: Original request from API
        :param query_info: Optional query info structure

        :return: Generated blended data frame
        """
        query_info = query_info or BlendingQueryInfo.create(req, ctx)

        # Before we touch the request, let's log exactly how client sent it.
        query_info.original_request_str = json.dumps(req.to_primitive())

        return cls._build_query(ctx, req, query_info)
    def _build_comparison_blend_query(
        cls,
        ctx: HuskyQueryContext,
        config_arg: BlendingDataRequest,
        taxon_manager: BlendingTaxonManager,
        query_info: BlendingQueryInfo,
        allowed_physical_data_sources: Optional[Set[str]] = None,
    ) -> Optional[Dataframe]:
        """
        Builds comparison query for each subrequest and then blends them all into one comparison dataframe.
        """
        dataframes = []
        config = BlendingDataRequest(config_arg.to_native(
        ))  # Clone, coz we will be modifying subqueries
        assert config.comparison, 'Comparison must be defined when trying to build comparison query..'
        comparison: ComparisonConfig = config.comparison
        for _subrequest in config.data_subrequests:
            subrequest = cls._build_comparison_subrequest(
                _subrequest, comparison, taxon_manager)
            data_source = subrequest.properties.data_source

            # if no comparison taxons were found for this subrequest, skip creating comparison query for it as well
            if len(subrequest.taxons) == 0:
                continue

            bm_sub_query_info = QueryInfo.create(subrequest)
            query_info.comparison_subrequests_info.append(bm_sub_query_info)
            # Build comparison dataframe and add it to a list.
            # TODO pass down TelPlan for comparisons
            # ComparisonRequestBuilder might have added filters (typically for company id project id)
            # Me create new filter templates for this comparison subrequest.
            filter_templates = TelPlanner.get_preaggregation_filter_templates(
                ctx,
                [
                    subrequest.preaggregation_filters,
                    subrequest.scope.preaggregation_filters
                ],
                taxon_manager.taxon_map,
                data_source,
            )

            dataframes.append(
                QueryBuilder.build_query(
                    ctx,
                    subrequest,
                    bm_sub_query_info,
                    taxon_manager.used_taxons,
                    dimension_templates=taxon_manager.plan.
                    comparison_data_source_formula_templates[data_source],
                    filter_templates=filter_templates,
                    allowed_physical_data_sources=allowed_physical_data_sources,
                ))

        # if no comparison subrequests were created, there is no need to blend data frames
        if len(dataframes) == 0:
            return None

        # Blend all comparison dataframes into one
        # TODO pass down TelPlan for comparisons
        data_source_formula_templates = taxon_manager.plan.comparison_data_source_formula_templates
        dataframe = blend_dataframes(ctx, dataframes,
                                     data_source_formula_templates)

        # Prefix all comparison metric columns with 'comparison@' and create comparison taxon for it.
        query = dataframe.query
        final_columns = []
        aliased_taxon_by_slug: Dict[TaxonExpressionStr,
                                    DataframeColumn] = dict()
        for slug, df_column in dataframe.slug_to_column.items():
            # Alias metrics with comparison@ prefix, and select dimensions..
            if df_column.taxon.is_dimension:
                new_taxon = df_column.taxon.copy(deep=True)
                new_slug = TaxonExpressionStr(f'{slug}')
            else:
                new_slug, new_taxon = BlendingTaxonManager.create_comparison_taxon(
                    df_column.taxon)

            final_columns.append(query.c[safe_identifier(slug)].label(
                new_taxon.slug_safe_sql_identifier))
            aliased_taxon_by_slug[new_slug] = DataframeColumn(
                new_slug, new_taxon, df_column.quantity_type)
        for pre_formulas in data_source_formula_templates.values():
            # and also select the dim columns from dim templates.
            for pre_formula in pre_formulas:
                final_columns.append(
                    literal_column(
                        quote_identifier(pre_formula.label, ctx.dialect)))
        renamed_cols_query = select(sort_columns(final_columns)).select_from(
            dataframe.query)
        return Dataframe(renamed_cols_query, aliased_taxon_by_slug,
                         dataframe.used_model_names,
                         dataframe.used_physical_data_sources)