def _build_comparison_request(self):
     taxon_manager = BlendingTaxonManager(self.request)
     taxon_manager.load_all_used_taxons(SNOWFLAKE_HUSKY_CONTEXT)
     comparison_request = ComparisonRequestBuilder._build_comparison_subrequest(
         self.request.data_subrequests[0], self.request.comparison,
         taxon_manager)
     return comparison_request.to_native()
def test_get_subrequest_taxons_in_preagg_filter(mock__get_taxons):
    req = BlendingDataRequest({
        'data_subrequests': [{
            'taxons': ['spend'],
            'preaggregation_filters': {
                'type': 'taxon_value',
                'taxon': 'company_id',
                'operator': '=',
                'value': '57',
            },
            'scope': {
                'preaggregation_filters': {
                    'type': 'taxon_value',
                    'taxon': 'account_id',
                    'operator': '=',
                    'value': '57',
                },
            },
            'properties': {
                'data_sources': ['facebook_ads']
            },
            'origin': {
                'system': 'test-case'
            },
        }],
        'taxons': ['spend'],
        'origin': {
            'system': 'test-case'
        },
    })
    manager = BlendingTaxonManager(req)
    manager.load_all_used_taxons(SNOWFLAKE_HUSKY_CONTEXT)
    slugs = manager.get_subrequest_taxons(req.data_subrequests[0])
    assert sorted(slugs) == ['company_id', 'spend']
示例#3
0
    def _build_data_blend_query(
        cls,
        ctx: HuskyQueryContext,
        taxon_manager: BlendingTaxonManager,
        config_arg: BlendingDataRequest,
        query_info: BlendingQueryInfo,
    ) -> Dataframe:
        """
        Builds subquery for each subrequest and then blends them all into one dataframe.
        :param ctx: Husky query context
        """
        dataframes = []
        request = BlendingDataRequest(config_arg.to_native(
        ))  # Clone, coz we will be modifying subqueries
        for subrequest in request.data_subrequests:
            # add comparison taxons to data subrequest taxons
            subrequest.taxons = taxon_manager.get_subrequest_taxons(subrequest)
            sub_query_info = QueryInfo({
                'used_raw_taxons': subrequest.taxons,
            })
            query_info.subrequests_info.append(sub_query_info)

            # Build query for subrequest and add it to the list
            data_source = subrequest.properties.data_source
            dimension_templates = taxon_manager.plan.data_source_formula_templates[
                data_source]
            filter_templates = taxon_manager.plan.data_source_filter_templates[
                data_source]
            df = MainQueryBuilder.build_query(
                ctx,
                subrequest.to_internal_model(),
                sub_query_info,
                taxon_manager.used_taxons,
                dimension_templates,
                filter_templates=filter_templates,
                allowed_physical_data_sources=set(
                    request.physical_data_sources)
                if request.physical_data_sources else None,
            )
            dataframes.append(df)

        return blend_dataframes(
            ctx, dataframes, taxon_manager.plan.data_source_formula_templates)
    def _build_comparison_subrequest(
            cls, original_subrequest: ApiDataRequest,
            comparison: ComparisonConfig,
            taxon_manager: BlendingTaxonManager) -> InternalDataRequest:
        subrequest: InternalDataRequest = original_subrequest.to_internal_model(
        )

        # Reset all filters. Getting comparison can only be filtered by project filters or company id.
        subrequest.preaggregation_filters = None

        # Reset limit and order by. Does not make sense for comparison.
        subrequest.limit = None
        subrequest.order_by = []

        # Get taxon slugs we need for comparison subrequest.
        subrequest.taxons = sorted(
            list(
                taxon_manager.get_comparison_subrequest_raw_taxons(
                    subrequest, comparison)))

        if comparison.scope == ComparisonScopeType.company:
            # If company scope, we add a filter on the company id and remove project filters and accounts
            # Eventually, we could fetch list of all accounts under a company and filter on that, since that will
            # probably be faster.
            subrequest.scope.preaggregation_filters = TaxonValueFilterClause({
                'type':
                FilterClauseType.TAXON_VALUE.value,
                'taxon':
                'company_id',
                'operator':
                SimpleFilterOperator.EQ.value,
                'value':
                subrequest.scope.company_id,
            })

        return subrequest
 def test_fb_tw_merged_objective_and_generic_cpm(self, mock__get_taxons):
     preprocess_request(self._blending_request)
     taxon_manager = BlendingTaxonManager(self._blending_request)
     taxon_manager.load_all_used_taxons(SNOWFLAKE_HUSKY_CONTEXT)
     plan = taxon_manager.plan
     assert plan.data_source_formula_templates == {
         'facebook_ads': [
             SqlFormulaTemplate(
                 SqlTemplate('''${facebook_ads|objective}'''),
                 '''__fb_tw_merged_objective1''',
                 'facebook_ads',
                 {'facebook_ads|objective'},
             )
         ],
         'twitter': [
             SqlFormulaTemplate(
                 SqlTemplate('''${twitter|objective}'''),
                 '''__fb_tw_merged_objective2''',
                 'twitter',
                 {'twitter|objective'},
             )
         ],
     }
     assert list(map(repr, plan.dimension_formulas)) == [
         repr(
             PreFormula(
                 '''coalesce(__fb_tw_merged_objective1, __fb_tw_merged_objective2)''',
                 '''fb_tw_merged_objective''',
                 AggregationDefinition(type=AggregationType.not_set),
                 None,
             ))
     ]
     assert list(map(repr, plan.metric_pre)) == [
         repr(
             PreFormula(
                 '''fb_tw_merged_objective''',
                 '''fb_tw_merged_objective''',
                 AggregationDefinition(type=AggregationType.group_by),
                 None,
             )),
         repr(
             PreFormula(
                 '''1000 * (coalesce(facebook_ads_spend_5811c78c7c741b5a, 0) + coalesce(twitter_spend_68657fbb141b10c8, 0))''',
                 '''__generic_cpm1''',
                 AggregationDefinition(type=AggregationType.sum),
                 None,
             )),
         repr(
             PreFormula(
                 '''coalesce(facebook_ads_impressions_0bf2e36fb4e71190, 0) + coalesce(twitter_impressions_ef12a84724a0ad7d, 0)''',
                 '''__generic_cpm2''',
                 AggregationDefinition(type=AggregationType.sum),
                 None,
             )),
     ]
     expected_merge_taxon = get_mocked_taxons_by_slug(
         ['fb_tw_merged_objective'])[0]
     expected_cpm_taxon = get_mocked_taxons_by_slug(['generic_cpm'])[0]
     assert list(map(repr, plan.metric_post)) == list(
         map(
             repr,
             [
                 (PostFormula(
                     'fb_tw_merged_objective',
                     'fb_tw_merged_objective'), expected_merge_taxon),
                 (
                     PostFormula(
                         '__generic_cpm1 / nullif(__generic_cpm2, 0)',
                         '__generic_cpm1 / nullif(__generic_cpm2, 0)'),
                     expected_cpm_taxon,
                 ),
             ],
         ))
    def _build_comparison_blend_query(
        cls,
        ctx: HuskyQueryContext,
        config_arg: BlendingDataRequest,
        taxon_manager: BlendingTaxonManager,
        query_info: BlendingQueryInfo,
        allowed_physical_data_sources: Optional[Set[str]] = None,
    ) -> Optional[Dataframe]:
        """
        Builds comparison query for each subrequest and then blends them all into one comparison dataframe.
        """
        dataframes = []
        config = BlendingDataRequest(config_arg.to_native(
        ))  # Clone, coz we will be modifying subqueries
        assert config.comparison, 'Comparison must be defined when trying to build comparison query..'
        comparison: ComparisonConfig = config.comparison
        for _subrequest in config.data_subrequests:
            subrequest = cls._build_comparison_subrequest(
                _subrequest, comparison, taxon_manager)
            data_source = subrequest.properties.data_source

            # if no comparison taxons were found for this subrequest, skip creating comparison query for it as well
            if len(subrequest.taxons) == 0:
                continue

            bm_sub_query_info = QueryInfo.create(subrequest)
            query_info.comparison_subrequests_info.append(bm_sub_query_info)
            # Build comparison dataframe and add it to a list.
            # TODO pass down TelPlan for comparisons
            # ComparisonRequestBuilder might have added filters (typically for company id project id)
            # Me create new filter templates for this comparison subrequest.
            filter_templates = TelPlanner.get_preaggregation_filter_templates(
                ctx,
                [
                    subrequest.preaggregation_filters,
                    subrequest.scope.preaggregation_filters
                ],
                taxon_manager.taxon_map,
                data_source,
            )

            dataframes.append(
                QueryBuilder.build_query(
                    ctx,
                    subrequest,
                    bm_sub_query_info,
                    taxon_manager.used_taxons,
                    dimension_templates=taxon_manager.plan.
                    comparison_data_source_formula_templates[data_source],
                    filter_templates=filter_templates,
                    allowed_physical_data_sources=allowed_physical_data_sources,
                ))

        # if no comparison subrequests were created, there is no need to blend data frames
        if len(dataframes) == 0:
            return None

        # Blend all comparison dataframes into one
        # TODO pass down TelPlan for comparisons
        data_source_formula_templates = taxon_manager.plan.comparison_data_source_formula_templates
        dataframe = blend_dataframes(ctx, dataframes,
                                     data_source_formula_templates)

        # Prefix all comparison metric columns with 'comparison@' and create comparison taxon for it.
        query = dataframe.query
        final_columns = []
        aliased_taxon_by_slug: Dict[TaxonExpressionStr,
                                    DataframeColumn] = dict()
        for slug, df_column in dataframe.slug_to_column.items():
            # Alias metrics with comparison@ prefix, and select dimensions..
            if df_column.taxon.is_dimension:
                new_taxon = df_column.taxon.copy(deep=True)
                new_slug = TaxonExpressionStr(f'{slug}')
            else:
                new_slug, new_taxon = BlendingTaxonManager.create_comparison_taxon(
                    df_column.taxon)

            final_columns.append(query.c[safe_identifier(slug)].label(
                new_taxon.slug_safe_sql_identifier))
            aliased_taxon_by_slug[new_slug] = DataframeColumn(
                new_slug, new_taxon, df_column.quantity_type)
        for pre_formulas in data_source_formula_templates.values():
            # and also select the dim columns from dim templates.
            for pre_formula in pre_formulas:
                final_columns.append(
                    literal_column(
                        quote_identifier(pre_formula.label, ctx.dialect)))
        renamed_cols_query = select(sort_columns(final_columns)).select_from(
            dataframe.query)
        return Dataframe(renamed_cols_query, aliased_taxon_by_slug,
                         dataframe.used_model_names,
                         dataframe.used_physical_data_sources)
    def _build_query(cls, ctx: HuskyQueryContext, request: BlendingDataRequest,
                     query_info: BlendingQueryInfo) -> Dataframe:
        """
        1. preprocess the request to make it backward compatible
        2. blend data df from subrequests
        3. build comparison df from subrequests (suggest comparison taxons, if needed and possible)
        4. data left join comparisons
        5. group by request dimensions
        """
        cls._preprocess_request(request)

        taxon_manager = BlendingTaxonManager(request)
        taxon_manager.load_all_used_taxons(ctx)

        company_id = request.data_subrequests[0].scope.company_id

        override_mapping_manager = OverrideMappingManager.initialize(
            company_id, taxon_manager.plan.override_mappings,
            taxon_manager.plan.comparison_override_mappings)

        # Build data df
        data_df = QueryBuilder._build_data_blend_query(ctx, taxon_manager,
                                                       request, query_info)
        data_df = DimensionPhaseBuilder.calculate_dataframe(
            taxon_manager.plan.dimension_formulas,
            override_mapping_manager.override_mapping_tel_data,
            override_mapping_manager.cte_map,
            data_df,
        )

        blended_df = None
        if request.comparison is None:
            blended_df = data_df
        else:
            if request.comparison is None or request.comparison.taxons is None:
                raise InvalidComparisonRequest()

            # Build comparison df and join to data df
            comparison_df = ComparisonRequestBuilder.build_comparison_query(
                ctx,
                request,
                taxon_manager,
                override_mapping_manager,
                query_info,
            )
            if comparison_df and comparison_df.slug_to_column:
                blended_df = left_join_dataframes(ctx, data_df, comparison_df,
                                                  taxon_manager.plan)
            else:
                # No taxons in comparison df, thus no point in joining, just return the data df.
                blended_df = data_df

        # get all taxons to be returned
        return_taxons = taxon_manager.get_return_taxons()

        calculated_df = MetricPhaseBuilder(taxon_manager).calculate_dataframe(
            ctx,
            blended_df,
            request.grouping_sets,
            filter_clause=request.filters,
        )

        # Project them to final df
        projected_df = ProjectionBuilder.project_dataframe(
            calculated_df,
            return_taxons,
            request.order_by,
            request.limit,
        )

        return projected_df