Пример #1
0
    def from_spec(cls,
                  registry,
                  unit_type,
                  measures=None,
                  segment_by=None,
                  where=None,
                  **opts):

        # Step 0: Resolve applicable measures and dimensions
        unit_type = registry._resolve_identifier(unit_type)
        measures = [] if measures is None else measures
        segment_by = [] if segment_by is None else segment_by

        measures = [
            registry._resolve_measure(unit_type, measure)
            for measure in measures
        ]

        segment_by = [
            registry._resolve_dimension(unit_type, dimension)
            for dimension in segment_by
        ]

        where = Constraint.from_spec(where)
        where_dimensions = [
            (registry._resolve_dimension(unit_type, dimension).as_implicit)
            for dimension in where.scoped_for_unit_type(unit_type).dimensions
            if dimension not in segment_by
        ]

        # Step 1: Collect measures and dimensions into groups based on current unit_type
        # and next unit_type

        current_evaluation = DimensionBundle(unit_type=unit_type,
                                             dimensions=[],
                                             measures=[])
        next_evaluations = {}

        def collect_dimensions(dimensions,
                               kind='measures',
                               for_constraint=False):
            for dimension in dimensions:
                if not dimension.via:
                    current_evaluation._asdict()[kind].append(dimension)
                elif (  # Handle reverse foreign key joins
                    (for_constraint or kind == 'measures')
                        and dimension.next_unit_type
                        in registry.reverse_foreign_keys_for_unit(unit_type)):
                    next_unit_type = registry._resolve_reverse_foreign_key(
                        unit_type, dimension.next_unit_type)
                    if next_unit_type not in next_evaluations:
                        next_evaluations[next_unit_type] = DimensionBundle(
                            unit_type=unit_type, dimensions=[], measures=[])
                    next_evaluations[next_unit_type]._asdict()[kind].append(
                        dimension.via_next)
                else:
                    next_unit_type = registry._resolve_foreign_key(
                        unit_type, dimension.next_unit_type)
                    if next_unit_type not in next_evaluations:
                        next_evaluations[next_unit_type] = DimensionBundle(
                            unit_type=next_unit_type,
                            dimensions=[],
                            measures=[])
                    next_evaluations[next_unit_type]._asdict()[kind].append(
                        dimension.via_next)

        collect_dimensions(measures, kind='measures')
        collect_dimensions(segment_by, kind='dimensions')
        collect_dimensions(where_dimensions,
                           kind='dimensions',
                           for_constraint=True)

        # Add required dimension for joining in next unit_types
        for dimension_bundle in next_evaluations.values():
            fk = registry._resolve_foreign_key(unit_type,
                                               dimension_bundle.unit_type)
            if fk not in current_evaluation.dimensions:
                current_evaluation.dimensions.append(fk.as_private)

        # Step 2: Create optimal joins for current unit_type

        provisions = registry._find_optimal_provision(
            unit_type=unit_type,
            measures=current_evaluation.measures,
            dimensions=current_evaluation.dimensions)

        evaluations = []
        for provision in provisions:
            generic_constraints = where.generic_for_provider(
                provision.provider)
            generic_constraint_dimensions = [
                provision.provider.resolve(dimension).as_private
                for dimension in generic_constraints.dimensions
                if not provision.dimensions
                or dimension not in provision.dimensions
            ]
            evaluations.append(
                cls(registry=registry,
                    provider=provision.provider,
                    unit_type=unit_type,
                    measures=provision.measures,
                    segment_by=provision.dimensions +
                    generic_constraint_dimensions,
                    where=generic_constraints,
                    join_prefix=provision.join_prefix))

        # Step 3: For each next unit_type, recurse problem and join into above query

        for foreign_key, dim_bundle in next_evaluations.items():
            foreign_strategy = cls.from_spec(registry=registry,
                                             unit_type=foreign_key,
                                             measures=dim_bundle.measures,
                                             segment_by=dim_bundle.dimensions,
                                             where=where.via_next(
                                                 foreign_key.name),
                                             **opts)

            if foreign_key != dim_bundle.unit_type:  # Reverse foreign key join
                foreign_key = dim_bundle.unit_type
                foreign_strategy.unit_type = dim_bundle.unit_type

            added = False
            for sub_strategy in evaluations:
                for dimension in sub_strategy.segment_by:
                    if isinstance(dimension, _StatisticalUnitIdentifier
                                  ) and dimension.matches(foreign_key):
                        sub_strategy.add_join(foreign_key, foreign_strategy)
                        added = True
                        break
            if not added:
                raise RuntimeError("Could not add foreign strategy: {}".format(
                    foreign_strategy))

        strategy = evaluations[0]
        for sub_strategy in evaluations[1:]:
            strategy.add_join(unit_type, sub_strategy)

        strategy.where = And.from_operands(strategy.where,
                                           where.scoped_applicable)

        # Step 4: Mark any resolved where dependencies as private, unless otherwise
        # requested in `segment_by`

        for dimension in strategy.segment_by:
            if dimension.implicit and dimension in where.scoped_applicable.dimensions:
                index = strategy.segment_by.index(dimension)
                strategy.segment_by[index] = strategy.segment_by[
                    index].as_private

        # Step 5: Return EvaluationStrategy, and profit.

        return strategy
Пример #2
0
    def _compat_fields_split(self,
                             measures,
                             segment_by,
                             where,
                             joins_post=None):
        """
        This method splits measures and segment_by dictionaries into two,
        corresponding to pre- and post- computation. The pre- field modify
        private statuses to prevent loss of join keys, and suppress
        external fields in joins_post. The second set are remove all features
        that were private in the pre- computation phase.

        It also splits where constraints such that constraints are applied
        as early as possible while still being semantically correct.
        """
        if len(joins_post) == 0:
            return measures, segment_by, where, None, None, None

        join_post_fields = []  # TODO: Use dictionaries for performance
        for join in joins_post:
            join_post_fields.extend(
                [m.as_via(join.join_prefix) for m in join.measures])
            join_post_fields.extend(
                [d.as_via(join.join_prefix) for d in join.dimensions])

        join_left_post_keys = list(
            itertools.chain(*[  # TODO: Use dictionaries for performance
                join.left_on for join in joins_post
            ]))

        join_right_post_keys = list(
            itertools.chain(*[  # TODO: Use dictionaries for performance
                join.right_on for join in joins_post
            ]))

        # Process constraint clauses
        where_pre = []
        where_post = []

        def add_constraint(op):
            if len(
                    set(op.dimensions).intersection([
                        d if isinstance(d, str) else d.via_name
                        for d in (join_post_fields + join_right_post_keys)
                    ])) > 0:
                where_post.append(op)
            else:
                where_pre.append(op)

        if where:
            if where.kind is CONSTRAINTS.AND:
                for op in where.operands:
                    add_constraint(op)
            else:
                add_constraint(where)

        where_pre = And.from_operands(where_pre)
        where_post = And.from_operands(where_post)

        # Process measures and dimensions
        def features_split(features, extra_public_keys=[]):
            pre = {}
            post = {}

            for feature in features:
                if feature.external and feature in join_post_fields:
                    post[feature] = feature
                    continue
                if feature.private and feature in (
                        join_left_post_keys + extra_public_keys +
                    (where_post.dimensions if where_post else [])):
                    pre[feature.as_public] = feature.as_public
                else:
                    pre[feature] = feature
                if not pre[feature].private:
                    post[feature] = feature

            return pre, post

        measures_pre, measures_post = features_split(
            measures,
            [self.resolve(unit_type=None, features='count', role='dimension')])
        segment_by_pre, segment_by_post = features_split(segment_by)

        return measures_pre, segment_by_pre, where_pre, measures_post, segment_by_post, where_post