def test_get_subrequest_taxons_in_preagg_filter(mock__get_taxons): req = BlendingDataRequest({ 'data_subrequests': [{ 'taxons': ['spend'], 'preaggregation_filters': { 'type': 'taxon_value', 'taxon': 'company_id', 'operator': '=', 'value': '57', }, 'scope': { 'preaggregation_filters': { 'type': 'taxon_value', 'taxon': 'account_id', 'operator': '=', 'value': '57', }, }, 'properties': { 'data_sources': ['facebook_ads'] }, 'origin': { 'system': 'test-case' }, }], 'taxons': ['spend'], 'origin': { 'system': 'test-case' }, }) manager = BlendingTaxonManager(req) manager.load_all_used_taxons(SNOWFLAKE_HUSKY_CONTEXT) slugs = manager.get_subrequest_taxons(req.data_subrequests[0]) assert sorted(slugs) == ['company_id', 'spend']
def setUp(self) -> None: super().setUp() self.request = BlendingDataRequest({ "data_subrequests": [{ "scope": { "company_id": "50", "preaggregation_filters": { 'type': 'group', 'logical_operator': 'AND', 'clauses': [ { 'type': 'taxon_value', 'taxon': 'account_id', 'operator': '=', 'value': '595126134331606', }, ], }, }, "properties": { "data_sources": ["facebook_ads"] }, "taxons": ["account_id", "spend", "cpm"], }], "comparison": ComparisonConfig({ 'scope': ComparisonScopeType.company.value, 'taxons': ['objective'] }).to_native(), })
def _build_data_blend_query( cls, ctx: HuskyQueryContext, taxon_manager: BlendingTaxonManager, config_arg: BlendingDataRequest, query_info: BlendingQueryInfo, ) -> Dataframe: """ Builds subquery for each subrequest and then blends them all into one dataframe. :param ctx: Husky query context """ dataframes = [] request = BlendingDataRequest(config_arg.to_native( )) # Clone, coz we will be modifying subqueries for subrequest in request.data_subrequests: # add comparison taxons to data subrequest taxons subrequest.taxons = taxon_manager.get_subrequest_taxons(subrequest) sub_query_info = QueryInfo({ 'used_raw_taxons': subrequest.taxons, }) query_info.subrequests_info.append(sub_query_info) # Build query for subrequest and add it to the list data_source = subrequest.properties.data_source dimension_templates = taxon_manager.plan.data_source_formula_templates[ data_source] filter_templates = taxon_manager.plan.data_source_filter_templates[ data_source] df = MainQueryBuilder.build_query( ctx, subrequest.to_internal_model(), sub_query_info, taxon_manager.used_taxons, dimension_templates, filter_templates=filter_templates, allowed_physical_data_sources=set( request.physical_data_sources) if request.physical_data_sources else None, ) dataframes.append(df) return blend_dataframes( ctx, dataframes, taxon_manager.plan.data_source_formula_templates)
def compile_transformation_request(cls, req: TransformRequest, company_id: str) -> Tuple[str, HuskyQueryRuntime]: """ Compiles Transform request to its SQL representation :param req: Input request :param company_id: Company ID :return: SQL and type of dialect """ sorted_fields = sorted(req.requested_fields) # prepare origin description origin = DataRequestOrigin( { 'system': 'FDQ', 'extra': { 'purpose': 'taxonomy.transform.compile', }, } ) # get all used taxons in the request used_taxons_map = fetch_all_used_taxons_map(company_id, sorted_fields) # figure out set of all virtual data sources covered by the taxons in the request used_vds = {taxon.data_source for taxon in used_taxons_map.values() if taxon.data_source} # generate subrequest for each virtual data source # this will allow Husky to push the taxons into relevant subrequests subrequests = [] for vds in sorted(used_vds): subrequest = ApiDataRequest({'scope': {'company_id': company_id}, 'properties': {'data_sources': [vds]}}) subrequests.append(subrequest) # finalize the blending husky request husky_request_dict = {'data_subrequests': subrequests, 'taxons': req.requested_fields, 'origin': origin} husky_request = BlendingDataRequest(husky_request_dict) connection = Connection.get() query_runtime_name = Connection.get_dialect_name(connection) query_runtime = EnumHelper.from_value_safe(HuskyQueryRuntime, query_runtime_name) context = HuskyQueryContext(query_runtime) husky_dataframe = QueryBuilder.validate_data_request(context, husky_request) # add another layer of query to use correct names final_query = cls._correct_column_aliases(context, husky_dataframe) return compile_query(final_query, context.dialect), context.query_runtime
def preprocess_request(req: BlendingDataRequest): """ Helper fn that is moving some values around, to be backward compatible. """ # Move order by from subrequests to top level for subrequest in req.data_subrequests: if subrequest.order_by: req.order_by.extend(subrequest.order_by) subrequest.order_by = [] # Add taxons from grouping sets to top level, so they can be copied to all subrequests. # Otherwise computed dimensions would not be included in the result. grouping_sets_taxons = {item for sublist in (req.grouping_sets or []) for item in (sublist or [])} req.taxons = req.taxons or [] req.taxons.extend(grouping_sets_taxons) move_top_level_to_subrequests(req.taxons, req.data_subrequests) if req.grouping_sets and req.fill_date_gaps: raise InvalidRequest('request.fill_date_gaps', 'fill_date_gaps is not supported when used with grouping sets.')
def setUp(self) -> None: super().setUp() self._twitter_acc_id = 'acc_id_tw123' self._fb_acc_id = 'acc_id_456' self._blending_request = BlendingDataRequest({ "data_subrequests": [ { "scope": { "preaggregation_filters": { "type": "taxon_value", "taxon": "account_id", "value": self._twitter_acc_id, "operator": "=", } }, "properties": { "data_sources": ["twitter"] }, }, { "scope": { "preaggregation_filters": { "type": "taxon_value", "taxon": "account_id", "value": self._fb_acc_id, "operator": "=", } }, "properties": { "data_sources": ["facebook_ads"] }, }, ], "taxons": ["fb_tw_merged_objective", "generic_cpm"], "limit": 100, }) self._info = BlendingQueryInfo.create(self._blending_request, SNOWFLAKE_HUSKY_CONTEXT)
def build_query( cls, ctx: HuskyQueryContext, req: BlendingDataRequest, query_info: Optional[BlendingQueryInfo] = None) -> Dataframe: """ Builds blended query Adding suggested comparison taxons (if desired, but missing) - attempt to use provided rules and generate the query using all taxons from the matched rule - if it fails, fall back to using only taxon Data Source as comparison taxon :param ctx: Husky query context :param req: Original request from API :param query_info: Optional query info structure :return: Generated blended data frame """ query_info = query_info or BlendingQueryInfo.create(req, ctx) # Before we touch the request, let's log exactly how client sent it. query_info.original_request_str = json.dumps(req.to_primitive()) return cls._build_query(ctx, req, query_info)
def _build_comparison_blend_query( cls, ctx: HuskyQueryContext, config_arg: BlendingDataRequest, taxon_manager: BlendingTaxonManager, query_info: BlendingQueryInfo, allowed_physical_data_sources: Optional[Set[str]] = None, ) -> Optional[Dataframe]: """ Builds comparison query for each subrequest and then blends them all into one comparison dataframe. """ dataframes = [] config = BlendingDataRequest(config_arg.to_native( )) # Clone, coz we will be modifying subqueries assert config.comparison, 'Comparison must be defined when trying to build comparison query..' comparison: ComparisonConfig = config.comparison for _subrequest in config.data_subrequests: subrequest = cls._build_comparison_subrequest( _subrequest, comparison, taxon_manager) data_source = subrequest.properties.data_source # if no comparison taxons were found for this subrequest, skip creating comparison query for it as well if len(subrequest.taxons) == 0: continue bm_sub_query_info = QueryInfo.create(subrequest) query_info.comparison_subrequests_info.append(bm_sub_query_info) # Build comparison dataframe and add it to a list. # TODO pass down TelPlan for comparisons # ComparisonRequestBuilder might have added filters (typically for company id project id) # Me create new filter templates for this comparison subrequest. filter_templates = TelPlanner.get_preaggregation_filter_templates( ctx, [ subrequest.preaggregation_filters, subrequest.scope.preaggregation_filters ], taxon_manager.taxon_map, data_source, ) dataframes.append( QueryBuilder.build_query( ctx, subrequest, bm_sub_query_info, taxon_manager.used_taxons, dimension_templates=taxon_manager.plan. comparison_data_source_formula_templates[data_source], filter_templates=filter_templates, allowed_physical_data_sources=allowed_physical_data_sources, )) # if no comparison subrequests were created, there is no need to blend data frames if len(dataframes) == 0: return None # Blend all comparison dataframes into one # TODO pass down TelPlan for comparisons data_source_formula_templates = taxon_manager.plan.comparison_data_source_formula_templates dataframe = blend_dataframes(ctx, dataframes, data_source_formula_templates) # Prefix all comparison metric columns with 'comparison@' and create comparison taxon for it. query = dataframe.query final_columns = [] aliased_taxon_by_slug: Dict[TaxonExpressionStr, DataframeColumn] = dict() for slug, df_column in dataframe.slug_to_column.items(): # Alias metrics with comparison@ prefix, and select dimensions.. if df_column.taxon.is_dimension: new_taxon = df_column.taxon.copy(deep=True) new_slug = TaxonExpressionStr(f'{slug}') else: new_slug, new_taxon = BlendingTaxonManager.create_comparison_taxon( df_column.taxon) final_columns.append(query.c[safe_identifier(slug)].label( new_taxon.slug_safe_sql_identifier)) aliased_taxon_by_slug[new_slug] = DataframeColumn( new_slug, new_taxon, df_column.quantity_type) for pre_formulas in data_source_formula_templates.values(): # and also select the dim columns from dim templates. for pre_formula in pre_formulas: final_columns.append( literal_column( quote_identifier(pre_formula.label, ctx.dialect))) renamed_cols_query = select(sort_columns(final_columns)).select_from( dataframe.query) return Dataframe(renamed_cols_query, aliased_taxon_by_slug, dataframe.used_model_names, dataframe.used_physical_data_sources)