示例#1
0
class ModelAttribute(SchematicsModel):
    taxon: str = NonEmptyStringType(required=True)
    identifier: bool = BooleanType(default=False)

    tel_transformation: str = NonEmptyStringType(required=True)
    """
    Model attribute transformation written in TEL.
    """

    quantity_type: ValueQuantityType = EnumType(ValueQuantityType, default=ValueQuantityType.scalar)

    @memoized_property
    def taxon_memoized(self):
        """
        Note the memoized property. It is cached on the instance after the first access.
        :return:
        """
        return self.taxon

    @memoized_property
    def identifier_memoized(self):
        """
        Note the memoized property. It is cached on the instance after the first access.
        :return:
        """
        return self.identifier

    def __repr__(self):
        return serialize_class_with_props(self)

    def __hash__(self):
        return hash(str(self.taxon_memoized))
class TaxonTaxonFilterClause(FilterClause):
    """Filter clause which represents clause including two columns connected by a simple operator"""

    taxon: str = StringType(required=True, min_length=1)
    """Left-side column name"""

    right_taxon: str = StringType(required=True, min_length=1)
    """Right-side column name"""

    operator: SimpleFilterOperator = EnumType(SimpleFilterOperator,
                                              required=True)
    """Operator in the clause"""
    def generate(
            self, ctx, query,
            taxon_model_info_map: Dict[str, TaxonModelInfo]) -> ClauseElement:
        return _generate_simple_operator_clause(ctx, self,
                                                taxon_model_info_map)

    def get_taxon_slugs(self) -> Set[str]:
        return {self.taxon, self.right_taxon}

    @staticmethod
    def _claim_polymorphic(
            data: Dict[str, Any]) -> Optional[Type['FilterClause']]:
        return FilterClause._detect_filter_clause_type(
            data, FilterClauseType.TAXON_TAXON, TaxonTaxonFilterClause)
class TaxonValueFilterClause(FilterClause):
    """Filter clause which represents simple clause with one column and one value connected by simple operator """

    taxon: str = StringType(required=True, min_length=1)
    """Column name in the clause"""

    operator: SimpleFilterOperator = EnumType(SimpleFilterOperator,
                                              required=True)
    """Operator in the clause"""

    value: Optional[Union[str, float, bool]] = UnionNoConversionType(
        (IntType, FloatType, StringType, BooleanType))
    """Comparison value in the clause"""
    def generate(
            self, ctx, query,
            taxon_model_info_map: Dict[str, TaxonModelInfo]) -> ClauseElement:
        return _generate_simple_operator_clause(ctx, self,
                                                taxon_model_info_map)

    def get_taxon_slugs(self) -> Set[str]:
        return {self.taxon}

    @staticmethod
    def _claim_polymorphic(
            data: Dict[str, Any]) -> Optional[Type['FilterClause']]:
        return FilterClause._detect_filter_clause_type(
            data, FilterClauseType.TAXON_VALUE, TaxonValueFilterClause)
示例#4
0
class Scope(schematics.Model):

    company_id: str = StringType(required=True)
    """
    Historically, company_id=None meant global. Now, we should use support company id, same as with taxons.
    """
    project_id: Optional[str] = StringType()

    model_visibility: ModelVisibility = EnumType(
        ModelVisibility, default=ModelVisibility.available)
    """
    Every production consumer should not set this field. Set to experimental if you want to test new changes / models.
    """

    preaggregation_filters = PolyModelType(get_all_filter_clauses())
    """Optional pre-aggregation filters which determine scope of this Husky request"""
    @property
    def all_filters(self):
        """
        Get current preaggregation scope filters
        """
        return self.preaggregation_filters

    def __hash__(self) -> int:
        return hash('_'.join(
            str(part) for part in [
                self.project_id,
                self.company_id,
                self.model_visibility,
            ]))
示例#5
0
class ApiScope(schematics.Model):
    company_id: str = StringType()
    project_id: Optional[str] = StringType()

    model_visibility: ModelVisibility = EnumType(
        ModelVisibility, default=ModelVisibility.available)
    """
    Every production consumer should not set this field. Set to experimental if you want to test new changes / models.
    """

    preaggregation_filters = PolyModelType(get_all_filter_clauses())
    """Pre-aggregation filters which determine scope of this Husky request"""
class TaxonArrayFilterClause(FilterClause):
    """Filter clause which represents an array-like clause in SQL (like IN)"""

    taxon: str = StringType(required=True)
    """Column name in the clause"""

    operator: ArrayFilterOperator = EnumType(ArrayFilterOperator,
                                             required=True)
    """Specifies operator in the clause"""

    value: Iterable[Union[str, float]] = ListType(StringType(required=True,
                                                             min_length=1),
                                                  min_size=1)
    """Value in the array clause"""
    def generate(
            self, ctx, query,
            taxon_model_info_map: Dict[str, TaxonModelInfo]) -> ClauseElement:
        taxon_model_info = taxon_model_info_map[self.taxon]

        left_operand = literal_column(taxon_model_info.taxon_sql_accessor)

        if taxon_model_info.is_array:
            # If taxon is an array, instead of using IN operator
            # we check if any of the value is obtained in the taxon value (list)
            if self.operator is ArrayFilterOperator.IN:
                return func.arrays_overlap(func.array_construct(self.value),
                                           left_operand)
            if self.operator is ArrayFilterOperator.NOT_IN:
                return not_(
                    func.arrays_overlap(func.array_construct(self.value),
                                        left_operand))
        else:
            if self.operator is ArrayFilterOperator.IN:
                return left_operand.in_(self.sql_value)
            if self.operator is ArrayFilterOperator.NOT_IN:
                return not_(left_operand.in_(self.sql_value))

        raise UnknownOperator(self)

    def get_taxon_slugs(self) -> Set[str]:
        return {self.taxon}

    @property
    def sql_value(self):
        return [literal(v) for v in self.value]

    @staticmethod
    def _claim_polymorphic(
            data: Dict[str, Any]) -> Optional[Type['FilterClause']]:
        return FilterClause._detect_filter_clause_type(
            data, FilterClauseType.TAXON_ARRAY, TaxonArrayFilterClause)
class FilterClause(Model):
    """Filter clause representing simple or a nested filter clause

    NOTE: Since class Model is already abstract class, this class does not need to specify it.
    """

    type: FilterClauseType = EnumType(FilterClauseType, required=True)
    """ Type of the filter clause """
    @abstractmethod
    def generate(
            self, ctx, query: Select,
            taxon_model_info_map: Dict[str, TaxonModelInfo]) -> ClauseElement:
        """ Generates SQL Alchemy representation of this filter clause"""
        raise NotImplementedError('Not implemented')

    @abstractmethod
    def get_taxon_slugs(self) -> Set[str]:
        """
        Gets all taxons used in this filter clause (and all filter clauses nested within it)
        """
        raise NotImplementedError('Not implemented')

    @staticmethod
    @abstractmethod
    def _claim_polymorphic(
            data: Dict[str, Any]) -> Optional[Type['FilterClause']]:
        """
        This method is used by PolyModelType to determine which model should be used to represent
        the structure during conversion.

        We could use .claim_polymorphic() instead, but then this method needs to know about all possible models.
        This is not an ideal so we opted for overriding this protected method.

        :param data: Data to be converted
        :return: Model class representing the data
        """
        raise NotImplementedError('Not implemented')

    @staticmethod
    def _detect_filter_clause_type(
            data: Dict[str, Any], expected_type: FilterClauseType,
            filter_clause: Type['FilterClause']
    ) -> Optional[Type['FilterClause']]:
        if 'type' not in data:
            return None

        if EnumHelper.from_value_safe(FilterClauseType,
                                      data['type']) == expected_type:
            return filter_clause

        return None
class GroupFilterClause(FilterClause):
    """Filter clause which represents a group of filter clauses connected by specified operator"""

    logical_operator: LogicalOperator = EnumType(LogicalOperator,
                                                 required=True)
    """ Logical operator connecting clauses"""

    clauses: List[FilterClause] = ListType(
        PolyModelType([
            'GroupFilterClause', TaxonValueFilterClause,
            TaxonTaxonFilterClause, TaxonArrayFilterClause
        ]),
        required=True,
        min_size=1,
    )
    """ List of clauses """

    negate: bool = BooleanType(default=False)
    """ Negate the whole group of clauses """
    def generate(
            self, ctx, query: Select,
            taxon_model_info_map: Dict[str, TaxonModelInfo]) -> ClauseElement:
        if self.logical_operator not in GROUP_OPERATORS_FUNCTIONS:
            raise UnknownOperator(self)

        clause = GROUP_OPERATORS_FUNCTIONS[self.logical_operator](
            clause.generate(ctx, query, taxon_model_info_map)
            for clause in self.clauses).self_group()

        if self.negate:
            return not_(clause)

        return clause

    def get_taxon_slugs(self) -> Set[str]:
        return set(taxon_slug for clause in self.clauses
                   for taxon_slug in clause.get_taxon_slugs())

    @staticmethod
    def _claim_polymorphic(
            data: Dict[str, Any]) -> Optional[Type['FilterClause']]:
        return FilterClause._detect_filter_clause_type(data,
                                                       FilterClauseType.GROUP,
                                                       GroupFilterClause)
示例#9
0
 class ModelWithEnum(SchematicsModel):
     e = EnumType(MyEnum)
     default = EnumType(MyEnum, default=MyEnum.b)
示例#10
0
class TaxonDataOrder(schematics.Model):
    taxon: str = StringType(required=True, min_length=1)
    type: TaxonOrderType = EnumType(TaxonOrderType, required=True)
示例#11
0
class ComparisonConfig(schematics.Model):
    taxons: Optional[List[TaxonExpressionStr]] = ListType(NonEmptyStringType)
    scope: ComparisonScopeType = EnumType(ComparisonScopeType,
                                          default=ComparisonScopeType.company)
示例#12
0
class ModelJoin(SchematicsModel):
    join_type: JoinType = EnumType(JoinType, required=True)
    relationship: Relationship = EnumType(Relationship, required=True)
    direction: Optional[JoinDirection] = EnumType(JoinDirection, required=False)
    """
    Allows to explicitly define in which direction a join edge can be traversed.
    ModelJoin is defined on a model and references to a model.
    If direction is not defined, system will use relationship type to infer the allowed direction (for backward
    compatibility issues)
    - 'both', the join edge can be traversed from both models (defined and referenced)
    - 'outgoing', the join edge can be traversed from defined model only
    - 'incoming', the join edge can be traversed from referenced model to defined model only

    """

    to_model = NonEmptyStringType(required=True)

    taxons: Optional[List[str]] = ListType(NonEmptyStringType)
    """
    List of taxons on which the two models should be joined.
    Later, we can set more customizable joins, even joining different taxons on each other.
    """

    @memoized_property
    def join_type_memoized(self):
        """
        Note the memoized property. It is cached on the instance after the first access.
        :return:
        """
        return self.join_type

    @memoized_property
    def relationship_memoized(self):
        """
        Note the memoized property. It is cached on the instance after the first access.
        :return:
        """
        return self.relationship

    @memoized_property
    def direction_memoized(self):
        """
        Note the memoized property. It is cached on the instance after the first access.
        :return:
        """
        return self.direction

    @memoized_property
    def to_model_memoized(self):
        """
        Note the memoized property. It is cached on the instance after the first access.
        :return:
        """
        return self.to_model

    @memoized_property
    def taxons_memoized(self):
        """
        Note the memoized property. It is cached on the instance after the first access.
        :return:
        """
        return self.taxons
示例#13
0
class HuskyModel(SchematicsModel):

    name: str = StringType(required=True, min_length=3)

    attributes: Dict[str, ModelAttribute] = DictType(ModelType(ModelAttribute), default=dict())
    """
    Key is name of the attribute. For usage, use attributes_memoized or attributes_by_taxon_memoized.
    """

    joins: List[ModelJoin] = ListType(ModelType(ModelJoin), default=[])
    """
    List of possible joins on other models
    """

    data_sources: List[str] = ListType(StringType(min_length=3), default=[], max_size=1)
    """
    Explicitly defined data sources.
    """

    model_type: Optional[HuskyModelType] = EnumType(HuskyModelType)
    """
    Optional attribute which defines type of the model explicitly
    """

    visibility: ModelVisibility = EnumType(ModelVisibility, default=ModelVisibility.hidden)

    company_id: str = NonEmptyStringType(required=True)

    project_id: Optional[str] = NonEmptyStringType(required=False)

    _alias: Optional[str] = None
    """
    Unique alias used in SQL for this model (if None, use full object name to reference columns)
    """

    fully_qualified_name_parts: Optional[List[str]] = ListType(NonEmptyStringType(required=True))
    """
    All parts of the fully qualified name. Can contain 2..N values, depending on the actual federated database.

    Example:
    - physical data source (always first)
    - database name
    - schema name
    - table name
    """

    def __repr__(self):
        return serialize_class_with_props(self)

    @property
    def number_of_identifiers(self) -> int:
        return len(self.identifier_attributes)

    @property
    def identifier_attributes(self) -> Set[ModelAttribute]:
        return {attr for attr in self.attributes_memoized.values() if attr.identifier_memoized}

    @property
    def identifier_taxon_slugs(self) -> Set[str]:
        return {attr.taxon for attr in self.identifier_attributes}

    @property
    def physical_data_source(self) -> str:
        """
        Gets name of physical data source.
        """
        if self.fully_qualified_name_parts is None:
            raise ValueError('Missing physical data source')
        else:
            return self.fully_qualified_name_parts[0]

    @memoized_property
    def time_granularity(self) -> Optional[TimeGranularity]:
        """
        Time granularity of model's data (if it can be inferred)
        """

        date_taxon_slug = prefix_with_virtual_data_source(self.data_source, TaxonSlugs.DATE)
        date_hour_taxon_slug = prefix_with_virtual_data_source(self.data_source, TaxonSlugs.DATE_HOUR)
        if self.has_taxon(date_taxon_slug):
            return TimeGranularity.day
        elif self.has_taxon(date_hour_taxon_slug):
            return TimeGranularity.hour

        return None

    def full_object_name(self, ctx: HuskyQueryContext) -> str:
        """
        Full name of the database object, including db and schema name.
        """
        assert self.fully_qualified_name_parts

        full_object_name = '.'.join(
            [quote_identifier(part, ctx.dialect) for part in self.fully_qualified_name_parts[1:]]
        )

        # sanity check that we have ANY name
        if not full_object_name:
            raise GenericModelException(
                'You are working with federated model so you need to turn on the appropriate feature flag',
                self.name,
                ExceptionErrorCode.FDQ_FLAG_REQUIRED,
            )

        return full_object_name

    @property
    def table_alias(self) -> Optional[str]:
        """
        Optional table alias to keep reference to the model unique (in case it is joined multiple times in query)
        """
        return self._alias

    @table_alias.setter
    def table_alias(self, alias: str):
        """
        Setter for optional table alias

        :param alias: New table alias
        """
        self._alias = alias

    @table_alias.deleter
    def table_alias(self):
        """
        Removes table alias
        """
        self._alias = None

    def unique_object_name(self, ctx: HuskyQueryContext) -> str:
        """
        Unique model reference within query
        :param ctx:
        """
        identifier = self.full_object_name(ctx) if self.table_alias is None else self.table_alias
        return identifier

    @property
    def graph_name(self) -> str:
        """
        Unique name in graph
        """
        return self.name if self.table_alias is None else self.table_alias

    @property
    def is_entity(self) -> bool:
        """
        Returns if the model is entity or not. Derived from model name at runtime.
        """
        if self.model_type is not None:
            return self.model_type is HuskyModelType.ENTITY

        return 'entity' in self.name.lower()

    @memoized_property
    def data_source(self) -> str:
        """
        Returns data source. Newly, all models have exactly one data source.
        """
        return self.data_sources[0]

    def get_attribute_by_taxon(self, taxon_slug: str) -> ModelAttribute:
        attribute = self.attributes_by_taxon_memoized.get(taxon_slug)
        if attribute:
            return attribute
        else:
            raise AttributeNotFound(f'Attribute with taxon {taxon_slug} not found.')

    def has_taxon(self, taxon: str) -> bool:
        try:
            self.get_attribute_by_taxon(taxon)
            return True
        except AttributeNotFound:
            return False

    @memoized_property
    def attributes_memoized(self):
        """
        Note the memoized property. It is cached on the instance after the first access.
        :return:
        """
        return self.attributes

    @memoized_property
    def attributes_by_taxon_memoized(self) -> Dict[str, ModelAttribute]:
        """
        Note the memoized property. It is cached on the instance after the first access.
        :return:
        """
        return {attr.taxon_memoized: attr for attr in self.attributes_memoized.values()}

    @memoized_property
    def joins_memoized(self):
        """
        Note the memoized property. It is cached on the instance after the first access.
        :return:
        """
        return self.joins

    @property
    def taxons(self) -> Set[str]:
        return set(self.attributes_by_taxon_memoized.keys())

    def taxon_sql_accessor(
        self,
        ctx: HuskyQueryContext,
        taxon_slug: str,
        cast_array: bool = False,
        model_tel_dialect: Optional[ModelTelDialect] = None,
    ) -> str:
        """
        Helper function that returns full sql accessor to given taxon on the model

        :param ctx:                     Husky query context
        :param taxon_slug               Original taxon slug
        :param cast_array               Automatically handle arrays by casting them to string (default is False)
        :param model_tel_dialect        Initialized model TEL dialect, if there is one (we use it to check for cyclic reference).

        """
        attribute = self.get_attribute_by_taxon(taxon_slug)

        # let TEL grammar to render the SQL transformation
        # on purpose, we dont use 'column' variable here, because we dont really rely on column_name attribute here
        tel_dialect = model_tel_dialect
        if tel_dialect is None:
            # no initialized tel visitor is provided so create a generic one
            tel_dialect = ModelTelDialect(
                unique_object_name=self.unique_object_name(ctx),
                virtual_data_source=self.data_sources[0],
                model=self,
            )

        # render the TEL transformation
        parsed_expression = tel_dialect.render(attribute.tel_transformation, ctx, {})
        sql_accessor = compile_query(parsed_expression.sql(ctx.dialect), ctx.dialect)

        # we cast arrays to varchar, if requested
        if cast_array and attribute.quantity_type is ValueQuantityType.array:
            sql_accessor = f'CAST({sql_accessor} AS VARCHAR)'

        return sql_accessor

    def __hash__(self):
        return hash(self.unique_object_name(SNOWFLAKE_HUSKY_CONTEXT))

    def add_attribute(self, model_attribute: ModelAttribute):
        """
        Adds attribute to a model. It should be used only in very edge-cases.

        One of the use cases is dynamically adding attributes when working with normalized values.

        :param model_attribute: Model attribute
        """
        self.attributes[model_attribute.taxon] = model_attribute
        self.attributes_memoized[model_attribute.taxon] = model_attribute
        self.attributes_by_taxon_memoized[model_attribute.taxon] = model_attribute

    def remove_attribute(self, taxon_slug: str):
        """
        Removes attribute from model by taxon slug
        :param taxon_slug: Taxon.slug
        """
        self.attributes.pop(taxon_slug, None)
        self.attributes_memoized.pop(taxon_slug, None)
        self.attributes_by_taxon_memoized.pop(taxon_slug, None)

    def add_join(self, model_join: ModelJoin):
        self.joins.append(model_join)
示例#14
0
class BlendingQueryInfo(schematics.Model):
    """
    Query info object for blending requests.
    """

    uuid: str = StringType(required=True)

    start_time: datetime = DateTimeType()
    """
    Timestamp of the moment when Husky started processing the blending request
    """

    status: str = StringType(choices=['success', 'fail'], default='success')
    """
    Status of the request
    """

    error: Optional[str] = StringType()
    """
    Traceback to the error
    """

    data_request: BlendingDataRequest = ModelType(BlendingDataRequest)
    """
    Full API request
    """

    origin_information: Optional[Dict[str, Any]] = DictType(BaseType)
    """
    Contains additional (and optional) information about request origin
    """

    subrequests_info: List[QueryInfo] = ListType(ModelType(QueryInfo), default=[])
    """
    List of QueryInfo objects for subrequests
    """

    comparison_subrequests_info: List[QueryInfo] = ListType(ModelType(QueryInfo), default=[])
    """
    List of QueryInfo objects for comparison subrequests
    """

    internal_metrics: QueryInternalMetrics = ModelType(QueryInternalMetrics, default=QueryInternalMetrics())
    """
    Internal measurements.
    Mostly timings and counts
    """

    definition: QueryDefinition = ModelType(QueryDefinition, default=QueryDefinition())
    """
    Definition of the Query
    Similar to actual API request, but should have more stable structure, and be independend of API changes
    """

    original_request_str: str = StringType(required=False)
    """
    Original request as it came to API serialized into string.
    """

    query_runtime: HuskyQueryRuntime = EnumType(HuskyQueryRuntime)

    @staticmethod
    def create(
        data_request: BlendingDataRequest,
        husky_context: HuskyQueryContext,
        origin_information: Optional[Dict[str, Any]] = None,
    ):
        return BlendingQueryInfo(
            trusted_data=dict(
                data_request=data_request,
                start_time=datetime.utcnow(),
                origin_information=origin_information,
                query_runtime=husky_context.query_runtime,
            )
        )