Exemplo n.º 1
0
 def test_blending_2_2(self):
     q1 = Select(
         columns=[
             column('ad_id'),
             column('impressions'),
             column(HUSKY_QUERY_DATA_SOURCE_COLUMN_NAME)
         ],
         from_obj=table('table1'),
     )
     df1 = Dataframe(
         q1, get_mocked_dataframe_columns_map(['ad_id', 'impressions']),
         set(), {'SF'})
     q2 = Select(
         columns=[
             column('ad_id'),
             column('campaign_id'),
             column('impressions'),
             column(HUSKY_QUERY_DATA_SOURCE_COLUMN_NAME),
         ],
         from_obj=table('table2'),
     )
     df2 = Dataframe(
         q2,
         get_mocked_dataframe_columns_map(
             ['ad_id', 'impressions', 'campaign_id']), set(), {'SF'})
     blended_df = blend_dataframes(SNOWFLAKE_HUSKY_CONTEXT, [df1, df2])
     self.write_test_expectations('query.sql',
                                  compile_query(blended_df.query))
     expected_query = self.read_test_expectations('query.sql')
     self.assertEqual(expected_query, compile_query(blended_df.query))
     self.assertEqual({'ad_id', 'impressions', 'campaign_id'},
                      set(blended_df.slug_to_column.keys()))
Exemplo n.º 2
0
    def test_namespaced_taxons_build_query(self):
        result, _, effectively_used_models = SelectBuilder(
            SNOWFLAKE_HUSKY_CONTEXT,
            self.scope,
            get_specific_select_mocked_taxons([
                'impressions', 'ad_id', f'{MOCK_DATA_SOURCE_NAME}|dimension',
                f'{MOCK_DATA_SOURCE_NAME}|metric'
            ]),
            get_specific_select_mocked_taxons([
                'impressions', 'ad_id', f'{MOCK_DATA_SOURCE_NAME}|dimension',
                f'{MOCK_DATA_SOURCE_NAME}|metric'
            ]),
            self.graph,
            'data-source',
        ).get_query()

        self.write_test_expectations('query.sql', compile_query(result))
        expected = self.read_test_expectations('query.sql')

        self.assertEqual(expected, compile_query(result))
        self.assertDictEqual(
            {
                'noncached_models': [
                    'mock_data_source.metric_model',
                    'mock_data_source.entity_model'
                ],
            },
            effectively_used_models.to_primitive(),
        )
Exemplo n.º 3
0
    def _run_basic_test(self, projection_taxons, selected_taxons=None):
        if selected_taxons is None:
            selected_taxons = projection_taxons

        query, taxon_model_info_map, _ = SelectBuilder(
            SNOWFLAKE_HUSKY_CONTEXT,
            self._scope,
            get_specific_select_mocked_taxons(selected_taxons),
            get_specific_select_mocked_taxons(projection_taxons),
            self._graph,
            'data-source',
        ).get_query()

        final_dataframe = ProjectionBuilder.query(
            query,
            taxon_model_info_map,
            get_specific_select_mocked_taxons(projection_taxons),
            'data-source',
            None,
            None,
            None,
        )

        self.write_test_expectations('query.sql',
                                     compile_query(final_dataframe.query))
        expected = self.read_test_expectations('query.sql')

        assert compile_query(final_dataframe.query) == expected
Exemplo n.º 4
0
 def item_to_primitive(self, item):
     if isinstance(item, ExprResult):
         result_dict = dict()
         result_dict['data_source_formula_templates'] = item.data_source_formula_templates
         result_dict['dimension_formulas'] = item.dimension_formulas
         result_dict['pre_formulas'] = item.pre_formulas
         result_dict['post_formula'] = item.post_formula
         result_dict['phase'] = item.phase
         result_dict['override_mappings'] = sorted(list(item.override_mappings))
         result_dict['invalid_value'] = item.invalid_value
         primitive = {k: self.item_to_primitive(v) for k, v in result_dict.items()}
         return primitive
     elif isinstance(item, PostFormula):
         post_formula_dict = {'sql': compile_query(item._sql)}
         if item.template is not None:
             template = compile_query(item.template)
             if template != post_formula_dict['sql']:
                 post_formula_dict['template'] = template
         if len(item.exclude_slugs) > 0:
             post_formula_dict['exclude_slugs'] = sorted([self.item_to_primitive(i) for i in item.exclude_slugs])
         return post_formula_dict
     elif isinstance(item, TelExpressionException):
         return {'exception_class': str(item.__class__), 'api_response_message': str(item)}
     else:
         return super().item_to_primitive(item)
Exemplo n.º 5
0
    def test_query_with_pre_filter_without_filter_taxons(self):
        filter_clause = TaxonValueFilterClause({
            'type':
            FilterClauseType.TAXON_VALUE.value,
            'taxon':
            'ad_name',
            'operator':
            SimpleFilterOperator.LIKE.value,
            'value':
            '%abc%',
        })

        result, _, _ = SelectBuilder(
            SNOWFLAKE_HUSKY_CONTEXT,
            self.scope,
            get_specific_select_mocked_taxons(
                ['impressions', 'ad_id', filter_clause.taxon]),
            get_specific_select_mocked_taxons(['impressions', 'ad_id']),
            self.graph,
            'data-source',
            filter_clause=filter_clause,
        ).get_query()

        self.write_test_expectations('query.sql', compile_query(result))
        expected = self.read_test_expectations('query.sql')

        self.assertEqual(expected, compile_query(result).strip())
Exemplo n.º 6
0
    def test_query_with_pre_taxon_taxon_pre_filter(self):
        filter_clause = TaxonTaxonFilterClause({
            'type':
            FilterClauseType.TAXON_VALUE.value,
            'taxon':
            'spend',
            'right_taxon':
            'impressions',
            'operator':
            SimpleFilterOperator.EQ.value,
        })

        result, _, _ = SelectBuilder(
            SNOWFLAKE_HUSKY_CONTEXT,
            self.scope,
            get_specific_select_mocked_taxons(
                ['ad_id'] + [s for s in filter_clause.get_taxon_slugs()]),
            get_specific_select_mocked_taxons(['ad_id']),
            self.graph,
            'data-source',
            filter_clause=filter_clause,
        ).get_query()

        self.write_test_expectations('query.sql', compile_query(result))
        expected = self.read_test_expectations('query.sql')

        self.assertEqual(expected, compile_query(result).strip())
Exemplo n.º 7
0
    def test_basic_build_join_query(self):
        taxons = get_specific_select_mocked_taxons(
            ['spend', 'impressions', 'ad_id', 'ad_name', 'week_of_year'])

        result, _, _ = SelectBuilder(SNOWFLAKE_HUSKY_CONTEXT, self.scope,
                                     taxons, taxons, self.graph,
                                     'data-source').get_query()
        self.write_test_expectations('query.sql', compile_query(result))
        expected = self.read_test_expectations('query.sql')

        self.assertEqual(expected, compile_query(result))
Exemplo n.º 8
0
def test_render_visitor(inp, expectation):
    result = ModelTelDialect(
        unique_object_name=_TMP_MODEL.unique_object_name(SNOWFLAKE_HUSKY_CONTEXT),
        virtual_data_source=_TMP_MODEL.data_sources[0],
        model=_TMP_MODEL,
    ).render(inp, SNOWFLAKE_HUSKY_CONTEXT, {})
    assert compile_query(result.sql(SNOWFLAKE_HUSKY_CONTEXT.dialect)) == expectation
Exemplo n.º 9
0
    def _build_taxon_model_info_map(self, taxons: Dict[TaxonSlugExpression,
                                                       Taxon],
                                    taxon_to_model: Dict[TaxonSlugExpression,
                                                         HuskyModel]):
        """
        Extract extra information (currently only if scalar/array) about taxons on models
        Currently it's hardcoded to mark all *_tags taxons as taxons of type array
        """
        taxon_model_info_map = dict()

        for taxon_slug_expression in taxons:
            model = taxon_to_model[taxon_slug_expression]
            taxon_column_selector = self._get_column_accessor_for_taxon_and_model(
                model, taxon_slug_expression)
            info = TaxonModelInfo(
                compile_query(taxon_column_selector, self.ctx.dialect),
                model.name,
                model.get_attribute_by_taxon(
                    taxon_slug_expression.slug).quantity_type,
            )

            taxon_model_info_map[taxon_slug_expression.slug] = info
        for filter_slug, template in self.filter_templates.items():
            if filter_slug not in taxon_model_info_map:
                # If the slug is in the info map, it means it is raw slug, and we dont need to create
                # sql accessor for it
                render_params = dict()
                for used_slug in template.used_taxons:
                    render_params[used_slug] = taxon_model_info_map[
                        used_slug].taxon_sql_accessor
                sql_accessor = template.render_formula(**render_params)
                taxon_model_info_map[filter_slug] = TaxonModelInfo(
                    sql_accessor, None, None)

        self.taxon_model_info_map = taxon_model_info_map
Exemplo n.º 10
0
def assert_result_formulas(actual_result: ExprResult,
                           expected_result: ExprResult):
    assert (actual_result.data_source_formula_templates == expected_result.
            data_source_formula_templates), 'data_source_formulas dont match'
    assert list(map(repr, actual_result.dimension_formulas)) == list(
        map(repr, expected_result.dimension_formulas)
    ), 'dimension_formulas dont match'
    assert list(map(repr, actual_result.pre_formulas)) == list(
        map(repr, expected_result.pre_formulas)), 'pre_formulas dont match'
    assert compile_query(actual_result.sql(_DIALECT),
                         _DIALECT) == compile_query(
                             expected_result.sql(_DIALECT),
                             _DIALECT), 'sql dont match'
    assert actual_result.phase == expected_result.phase, 'phase dont match'
    assert actual_result.override_mappings == expected_result.override_mappings, 'override mappings dont match'

    assert actual_result.invalid_value == expected_result.invalid_value, "invalid_value doesn't match"
Exemplo n.º 11
0
    def result(self, context: TelRootContext) -> TelQueryResult:
        result = self._value.result(context)

        data_source_formula_templates = []

        value_phase = self._value.phase(context)
        if value_phase == self._phase:
            return result

        if value_phase in [TelPhase.dimension_data_source, TelPhase.any]:
            if self.used_taxons(context).has_some():
                if not self._cached_label:
                    self._cached_label = context.new_label

                assert 1 == len(self._value.return_data_sources(context))

                data_source = cast(
                    str, next(ds for ds in list(self._value.return_data_sources(context)) if ds is not None)
                )

                sql = literal_column(safe_quote_identifier(self._cached_label, context.husky_dialect))
                data_source_formula_templates.append(
                    SqlFormulaTemplate(
                        SqlTemplate(compile_query(result.sql, context.husky_dialect)),
                        cast(str, self._cached_label),
                        data_source,
                        cast(Set[str], self._value.template_slugs(context)),
                    )
                )
                template = sql
            else:
                sql = result.sql
                template = result.template

            label = self._cached_label or result.label

            if not self._value.invalid_value(context):
                return TelQueryResult(
                    sql=sql,
                    dialect=context.husky_dialect,
                    aggregations=result.aggregations,
                    dimension_formulas=result.dimension_formulas,
                    data_source_formula_templates=data_source_formula_templates + result.data_source_formula_templates,
                    label=label,
                    exclude_slugs=result.exclude_slugs,
                    template=template,
                )
            else:
                return TelQueryResult(
                    sql,
                    dialect=context.husky_dialect,
                    data_source_formula_templates=data_source_formula_templates,
                    label=label,
                )
        else:
            raise RuntimeError(f'Cannot move to {self._phase} phase from {value_phase}')
Exemplo n.º 12
0
    def test_basic_build_query_with_pre_filter(self):
        projected_taxons = ['impressions', 'spend']

        pre_filter = TaxonValueFilterClause({
            'type':
            FilterClauseType.TAXON_VALUE.value,
            'taxon':
            'ad_name',
            'operator':
            SimpleFilterOperator.LIKE,
            'value':
            'zombies!',
        })

        selected_taxons = projected_taxons + ['ad_name']

        query, taxon_model_info_map, _ = SelectBuilder(
            SNOWFLAKE_HUSKY_CONTEXT,
            self._scope,
            get_specific_select_mocked_taxons(selected_taxons),
            get_specific_select_mocked_taxons(projected_taxons),
            self._graph,
            'data-source',
            pre_filter,
        ).get_query()

        final_dataframe = ProjectionBuilder.query(
            query,
            taxon_model_info_map,
            get_specific_select_mocked_taxons(projected_taxons),
            'data-source',
            None,
            None,
            None,
            {'context'},
        )

        self.write_test_expectations('query.sql',
                                     compile_query(final_dataframe.query))
        expected = self.read_test_expectations('query.sql')

        self.assertEqual(expected, compile_query(final_dataframe.query))
Exemplo n.º 13
0
    def test_basic_build_join_query(self):
        taxons = get_specific_select_mocked_taxons(
            ['spend', 'gender', 'impressions', 'ad_id', 'ad_name'])

        result, _, effectively_used_models = SelectBuilder(
            SNOWFLAKE_HUSKY_CONTEXT, self.scope, taxons, taxons, self.graph,
            'data-source').get_query()
        self.write_test_expectations('query.sql', compile_query(result))
        expected = self.read_test_expectations('query.sql')

        self.assertEqual(expected, compile_query(result))
        self.assertDictEqual(
            {
                'noncached_models': [
                    'mock_data_source.metric_gender_model',
                    'mock_data_source.entity_model'
                ],
            },
            effectively_used_models.to_primitive(),
        )
Exemplo n.º 14
0
    def compile_transformation_request(cls, req: TransformRequest, company_id: str) -> Tuple[str, HuskyQueryRuntime]:
        """
        Compiles Transform request to its SQL representation

        :param req: Input request
        :param company_id: Company ID

        :return: SQL and type of dialect
        """
        sorted_fields = sorted(req.requested_fields)
        # prepare origin description
        origin = DataRequestOrigin(
            {
                'system': 'FDQ',
                'extra': {
                    'purpose': 'taxonomy.transform.compile',
                },
            }
        )

        # get all used taxons in the request
        used_taxons_map = fetch_all_used_taxons_map(company_id, sorted_fields)

        # figure out set of all virtual data sources covered by the taxons in the request
        used_vds = {taxon.data_source for taxon in used_taxons_map.values() if taxon.data_source}

        # generate subrequest for each virtual data source
        # this will allow Husky to push the taxons into relevant subrequests
        subrequests = []
        for vds in sorted(used_vds):
            subrequest = ApiDataRequest({'scope': {'company_id': company_id}, 'properties': {'data_sources': [vds]}})

            subrequests.append(subrequest)

        # finalize the blending husky request
        husky_request_dict = {'data_subrequests': subrequests, 'taxons': req.requested_fields, 'origin': origin}

        husky_request = BlendingDataRequest(husky_request_dict)

        connection = Connection.get()

        query_runtime_name = Connection.get_dialect_name(connection)
        query_runtime = EnumHelper.from_value_safe(HuskyQueryRuntime, query_runtime_name)
        context = HuskyQueryContext(query_runtime)

        husky_dataframe = QueryBuilder.validate_data_request(context, husky_request)

        # add another layer of query to use correct names
        final_query = cls._correct_column_aliases(context, husky_dataframe)

        return compile_query(final_query, context.dialect), context.query_runtime
Exemplo n.º 15
0
    def test_basic_build_query_with_order_by(self):
        selected_taxons = ['impressions', 'ad_id']
        query, taxon_model_info_map, _ = SelectBuilder(
            SNOWFLAKE_HUSKY_CONTEXT,
            self._scope,
            get_specific_select_mocked_taxons(selected_taxons),
            get_specific_select_mocked_taxons(selected_taxons),
            self._graph,
            'data-source',
        ).get_query()

        taxon_order_1 = TaxonDataOrder({
            'taxon': 'impressions',
            'type': TaxonOrderType.desc.value
        })
        taxon_order_2 = TaxonDataOrder({
            'taxon': 'ad_id',
            'type': TaxonOrderType.asc.value
        })
        order_by = [taxon_order_1, taxon_order_2]

        final_dataframe = ProjectionBuilder.query(
            query,
            taxon_model_info_map,
            get_specific_select_mocked_taxons(selected_taxons),
            'data-source',
            order_by,
            1,
            2,
            {'context'},
        )

        self.write_test_expectations('query.sql',
                                     compile_query(final_dataframe.query))
        expected = self.read_test_expectations('query.sql')

        self.assertEqual(expected, compile_query(final_dataframe.query))
Exemplo n.º 16
0
    def test_scope_filters(self, mock__get_taxons, mock__load_models):
        mock__load_models.return_value = [
            get_mock_entity_model(),
            get_mock_metric_model(),
        ]
        request = InternalDataRequest({
            'scope': {
                'project_id': 'project',
                'company_id': 'company',
                "preaggregation_filters": {
                    "type":
                    "group",
                    "logical_operator":
                    "AND",
                    "clauses": [
                        {
                            "type": "taxon_value",
                            "taxon": "account_id",
                            "operator": "=",
                            "value": "595126134331606"
                        },
                    ],
                    "negate":
                    False,
                },
            },
            'properties': {
                'data_sources': ['mock_data_source']
            },
            'taxons': ['account_id', 'ad_name'],
        })
        dataframe = QueryBuilder.build_query(
            SNOWFLAKE_HUSKY_CONTEXT,
            request,
            QueryInfo.create(request),
            preloaded_taxons=TAXON_MAP,
        )

        actual = compile_query(dataframe.query)
        self.write_test_expectations('query.sql', actual)
        expected = self.read_test_expectations('query.sql')
        assert expected == actual
        self.assertEqual({'mock_data_source.entity_model'},
                         dataframe.used_model_names)
Exemplo n.º 17
0
    def taxon_sql_accessor(
        self,
        ctx: HuskyQueryContext,
        taxon_slug: str,
        cast_array: bool = False,
        model_tel_dialect: Optional[ModelTelDialect] = None,
    ) -> str:
        """
        Helper function that returns full sql accessor to given taxon on the model

        :param ctx:                     Husky query context
        :param taxon_slug               Original taxon slug
        :param cast_array               Automatically handle arrays by casting them to string (default is False)
        :param model_tel_dialect        Initialized model TEL dialect, if there is one (we use it to check for cyclic reference).

        """
        attribute = self.get_attribute_by_taxon(taxon_slug)

        # let TEL grammar to render the SQL transformation
        # on purpose, we dont use 'column' variable here, because we dont really rely on column_name attribute here
        tel_dialect = model_tel_dialect
        if tel_dialect is None:
            # no initialized tel visitor is provided so create a generic one
            tel_dialect = ModelTelDialect(
                unique_object_name=self.unique_object_name(ctx),
                virtual_data_source=self.data_sources[0],
                model=self,
            )

        # render the TEL transformation
        parsed_expression = tel_dialect.render(attribute.tel_transformation,
                                               ctx, {})
        sql_accessor = compile_query(parsed_expression.sql(ctx.dialect),
                                     ctx.dialect)

        # we cast arrays to varchar, if requested
        if cast_array and attribute.quantity_type is ValueQuantityType.array:
            sql_accessor = f'CAST({sql_accessor} AS VARCHAR)'

        return sql_accessor
Exemplo n.º 18
0
    def render_formula(
        self,
        dialect: default.DefaultDialect,
        dimension_slugs: Optional[Set[TaxonExpressionStr]] = None
    ) -> ClauseElement:
        """
        Render the final SQL formula by replacing DIMENSION_SLUGS_TEMPLATE_PARAM with the comma separated
        list of provided dimension_slugs. Any slugs in the _exclude_slugs attribute will not be included.
        If the final set of dimension slugs is empty, then the unchanged _sql formula is returned instead.
        """

        valid_dimension_slugs = (dimension_slugs
                                 or set()).difference(self._exclude_slugs)
        if self._template is not None and valid_dimension_slugs:
            template = SqlTemplate(compile_query(self._template, dialect))
            template_mapping = {
                self.DIMENSION_SLUGS_TEMPLATE_PARAM:
                ', '.join(sorted(valid_dimension_slugs))
            }
            return literal_column(template.substitute(template_mapping))

        return self._sql
Exemplo n.º 19
0
 def test_simple_concat(self, mock__get_taxons, mock__load_models):
     mock__load_models.return_value = [
         get_mock_entity_model(),
         get_mock_metric_model(),
     ]
     request = InternalDataRequest({
         'scope': {
             'project_id': 'project-id',
             'company_id': 'company-id',
             "preaggregation_filters": {
                 "type": "taxon_value",
                 "taxon": "account_id",
                 "operator": "=",
                 "value": "abc",
             },
         },
         'properties': {
             'data_sources': ['mock_data_source']
         },
         'taxons': ['account_id', 'ad_name'],
     })
     dimension_templates = [
         SqlFormulaTemplate(SqlTemplate('''concat(${ad_name},'xx')'''),
                            '''__1''', MOCK_DATA_SOURCE_NAME, {'ad_name'})
     ]
     df = QueryBuilder.build_query(
         SNOWFLAKE_HUSKY_CONTEXT,
         request,
         QueryInfo.create(request),
         preloaded_taxons=TAXON_MAP,
         dimension_templates=dimension_templates,
     )
     actual = compile_query(df.query)
     self.write_test_expectations('query.sql', actual)
     expected = self.read_test_expectations('query.sql')
     assert expected == actual
     self.assertEqual({'mock_data_source.entity_model'},
                      df.used_model_names)
Exemplo n.º 20
0
 def test_scope_filters(self):
     scope_filter = TaxonValueFilterClause({
         'type':
         FilterClauseType.TAXON_VALUE.value,
         'taxon':
         'account_id',
         'operator':
         SimpleFilterOperator.EQ.value,
         'value':
         '10',
     }).to_native()
     scope = Scope(
         dict(company_id='10',
              project_id='10',
              preaggregation_filters=scope_filter))
     model = get_mock_metric_model()
     query = select([literal_column('test')])
     model_info = TaxonModelInfo('acc_id_column', model.name,
                                 ValueQuantityType.scalar)
     new_query = ScopeGuard.add_scope_row_filters(
         SNOWFLAKE_HUSKY_CONTEXT, scope, query, dict(account_id=model_info))
     # Not global model, we are fine without scope filters
     assert compile_query(
         new_query) == "SELECT test \nWHERE acc_id_column = '10'"
Exemplo n.º 21
0
    def build_query(
        ctx: HuskyQueryContext,
        subrequest: InternalDataRequest,
        query_info: QueryInfo,
        preloaded_taxons: TaxonMap,
        dimension_templates: Optional[List[SqlFormulaTemplate]] = None,
        filter_templates: Optional[TaxonToTemplate] = None,
    ) -> Dataframe:
        """
        Returns Query and Taxons obtained in it
        :param dimension_templates Sql column templates to select
        :param filter_templates Filter temples keyed by taxon slug, referenced from scope or preagg filters.
        """
        dimension_templates = dimension_templates or []
        filter_templates = filter_templates or dict()
        # Fetch Taxons
        simple_taxon_manager = SimpleTaxonManager.initialize(
            subrequest, dimension_templates, filter_templates, preloaded_taxons
        )

        data_sources = set(subrequest.properties.data_sources)
        if len(subrequest.properties.data_sources) != 1:
            # Joining across data sources is more complex and not implemented yet.
            raise MultipleDataSources(data_sources)
        data_source = subrequest.properties.data_sources[0]

        models = ModelRetriever.load_models(data_sources, subrequest.scope, subrequest.properties.model_name)

        # Build Graph
        graph = GraphBuilder.create_with_models(models)

        # Create Select Query
        select_query, taxon_model_info_map, effectively_used_models = SelectBuilder(
            ctx,
            subrequest.scope,
            simple_taxon_manager.graph_select_taxons,
            simple_taxon_manager.projection_taxons,
            graph,
            data_source,
            subrequest.preaggregation_filters,
            dimension_templates,
            filter_templates,
        ).get_query()

        query_info.definition = QueryDefinition({'effectively_used_models': effectively_used_models})

        logger.debug('Select Query: %s', compile_query(select_query, ctx.dialect))

        # Create Projection Query
        final_dataframe = ProjectionBuilder.query(
            select_query,
            taxon_model_info_map,
            simple_taxon_manager.projection_taxons,
            subrequest.properties.data_source,
            subrequest.order_by,
            subrequest.limit,
            subrequest.offset,
            dimension_templates,
        )

        logger.debug('Projection Query: %s', compile_query(final_dataframe.query, ctx.dialect))
        return final_dataframe