Пример #1
0
    def _features_lookup(self, unit_type, kind, attr_filter=None):
        assert kind in ('foreign_key', 'reverse_foreign_key', 'dimension',
                        'measure')

        unit_type = self.identifier_for_unit(unit_type)
        feature_source = getattr(self._cache, kind + 's')

        features = SequenceMap()
        for avail_unit_type in feature_source:
            if avail_unit_type.matches(unit_type):
                for feature, instances in feature_source[
                        avail_unit_type].items():
                    if feature not in features and (not attr_filter
                                                    or attr_filter(feature)):
                        mask = None
                        if kind in ('foreign_key', 'reverse_foreign_key'
                                    ) and avail_unit_type == feature.name:
                            mask = unit_type.name
                        features.append(
                            _ResolvedFeature(
                                feature.name,
                                providers=[d.provider for d in instances],
                                unit_type=unit_type,
                                mask=mask,
                                kind=kind))
        return features
Пример #2
0
    def measures_for_unit(self, unit_type):
        unit_type = self.identifier_for_unit(unit_type)
        if unit_type is None:
            return self.measures

        measures = SequenceMap()
        for measure in self.measures:
            if self._unit_has_measure(unit_type, measure):
                measures.append(measure)
        return measures
Пример #3
0
    def dimensions_for_unit(self, unit_type, include_partitions=True):
        unit_type = self.identifier_for_unit(unit_type)
        if unit_type is None:
            return self.dimensions

        dimensions = SequenceMap()
        for dimension in self.dimensions:
            if (self._unit_has_dimension(unit_type, dimension)
                    and (include_partitions or not dimension.partition)):
                dimensions.append(dimension)
        return dimensions
Пример #4
0
    def foreign_keys_for_unit(self, unit_type):
        unit_type = self.identifier_for_unit(unit_type)
        if unit_type is None:
            return self.identifiers

        foreign_keys = SequenceMap()
        for foreign_key in self.identifiers:
            if self._unit_has_foreign_key(unit_type, foreign_key):
                if unit_type.name == foreign_key:
                    foreign_key = foreign_key.with_mask(unit_type.mask)
                foreign_keys.append(foreign_key)
        return foreign_keys
Пример #5
0
 def _get_dimensions_from_specs(self, cls, specs):
     dims = SequenceMap()
     if specs is None:
         return dims
     for spec in specs:
         dim = cls.from_spec(spec, provider=self)
         dims[dim] = dim
     return dims
Пример #6
0
    def __init__(self, registry, provider, unit_type, measures, segment_by=None,
                 where=None, join_on_left=None, join_on_right=None,
                 join_prefix=None, joins=None):
        self.registry = registry
        self.provider = provider

        # Statistical unit used for evaluation
        self.unit_type = unit_type

        # Anticipated measures, segmentations and constraints
        self.measures = SequenceMap(measures or [])
        self.segment_by = SequenceMap(segment_by or [])
        self.where = where

        # Join parameters
        self.is_joined = False
        self.join_is_compatible = True
        self.join_on_left = join_on_left
        self.join_on_right = join_on_right or [self.matched_unit_type.name]
        self.joins = joins or []
        self.join_prefix = join_prefix or self.unit_type.name
Пример #7
0
    def resolve(self, unit_type, features, role=None, with_attrs=None):
        """
        This method resolves one or more features optionally associated with a
        unit_type and a role. Note that this method is concerned about
        *functional* resolution, so if `role='dimension'` both identifiers and
        measures will be resolved, since they can be used as dimensions.

        Parameters:
            names (str, list<str>): A name or list of names to resolve.
            unit_type (str, None): A unit type for which the resolution should
                be done.
            role (str, None): One of 'measure', 'dimension', 'identifier' or `None`.
            with_attrs (dict, None): Attributes to set on the returned feature.
                Note that these are *additive* to any attributes already inherited
                from feature_type (which are otherwise preserved).

        Returns:
            _Dimension, _Measure, _StatisticalUnitIdentifier: The resolved object.
        """
        return_one = False

        if not isinstance(features, (list, SequenceMap)):
            return_one = True
            features = [features]

        unresolvable = []
        resolved = SequenceMap()
        for feature in features:
            try:
                attrs = with_attrs.copy() if with_attrs else {}
                if isinstance(feature, tuple):
                    feature = FeatureSpec(feature[0], **feature[1])
                if isinstance(feature, dict):
                    feature = FeatureSpec(**feature)
                if isinstance(feature, FeatureSpec):
                    feature, extra_attrs = feature.as_source_with_attrs(
                        unit_type)
                    attrs.update(extra_attrs)
                r = self._resolve(unit_type=unit_type,
                                  feature=feature,
                                  role=role)._with_attrs(**attrs)
                resolved[r] = r
            except ValueError:
                unresolvable.append(feature)
        if len(unresolvable):
            raise ValueError(
                "Could not resolve {}(s) associated with unit_type '{}' for: '{}'"
                .format(role or 'feature', unit_type.__repr__(),
                        "', '".join(str(dim) for dim in unresolvable)))

        if return_one:
            return resolved.first
        return resolved
Пример #8
0
        def wrapped(self,
                    unit_type,
                    measures=None,
                    segment_by=None,
                    where=None,
                    joins=None,
                    stats=True,
                    covariates=False,
                    context=None,
                    stats_registry=None,
                    **opts):
            unit_type = self.identifier_for_unit(unit_type)
            if isinstance(measures, (str, _ProvidedFeature)):
                measures = [measures]
            measures = SequenceMap() if measures is None else self.resolve(
                unit_type=unit_type, features=measures, role='measure')
            if isinstance(segment_by, (str, _ProvidedFeature)):
                segment_by = [segment_by]
            segment_by = SequenceMap() if segment_by is None else self.resolve(
                unit_type=unit_type, features=segment_by, role='dimension')
            where = Constraint.from_spec(where)
            joins = joins or []
            stats_registry = stats_registry or global_stats_registry
            context = context or {}

            # opts = self.opts.process(**opts)
            return f(self,
                     unit_type,
                     measures=measures,
                     segment_by=segment_by,
                     where=where,
                     joins=joins,
                     stats=stats,
                     covariates=covariates,
                     context=context,
                     stats_registry=stats_registry,
                     **opts)
Пример #9
0
    def _evaluate(self, unit_type, measures, segment_by, where, joins, stats,
                  covariates, context, stats_registry, **opts):

        assert stats_registry is not None
        assert not any(measure.external for measure in measures)
        assert not any(dimension.external for dimension in segment_by)
        rebase_agg = not unit_type.is_unique

        raw_data = (self.data.assign(count=1))

        where_dims = SequenceMap([
            self.dimensions[dim] for dim in where.dimensions
            if dim not in segment_by
        ])
        df = (pd.DataFrame().assign(
            **{
                dimension.fieldname(role='dimension',
                                    unit_type=unit_type if not rebase_agg else
                                    None): raw_data.eval(dimension.expr)
                for dimension in itertools.chain(segment_by, where_dims)
            }).assign(
                **{
                    measure.fieldname(role='measure',
                                      unit_type=unit_type if not rebase_agg
                                      else None): raw_data.eval(measure.expr)
                    for measure in measures
                }))

        return (self._finalise_dataframe(df,
                                         unit_type=unit_type,
                                         measures=measures,
                                         segment_by=segment_by,
                                         where=where,
                                         stats_registry=stats_registry,
                                         stats=stats,
                                         rebase_agg=rebase_agg))
Пример #10
0
 def identifiers(self):
     return SequenceMap(
         self.identifier_for_unit(ut)
         for ut in self._cache.identifiers.keys())
Пример #11
0
class MetaMeasureProvider(MeasureProvider):
    """
    A `MeasureProvider` subclass that acts as a host for other `MeasureProvider`
    instances, allowing evaluations of measures that span multiple providers.

    Instances of this class generate a graph of relationships between all of the
    identifiers, measures and dimensions provided by all hosted providers.
    Relationships between these features can then be extracted and used in
    various tasks, chief among which being the evaluation of measures for a
    statistical unit type segmented by various dimensions. The logic for the
    evaluation is handled by the `mensor.measures.evaluation.EvaluationStrategy`
    class.

    The graph formed by registering `MeasureProvider` instances has the
    following relationships:
        - unit_type -> foreign_key
        - unit_type <- foreign_key [-> reverse_foreign_key]
        - unit_type -> dimension
        - unit_type -> measure
    """
    class GraphCache:
        """
        The internal representation of the relationships between features
        across multiple MeasureProviders.
        """
        def __init__(self,
                     providers=None,
                     identifiers=None,
                     foreign_keys=None,
                     reverse_foreign_keys=None,
                     dimensions=None,
                     measures=None):
            self.providers = providers or {}
            self.identifiers = identifiers or {}
            self.foreign_keys = foreign_keys or {}
            self.reverse_foreign_keys = reverse_foreign_keys or {}
            self.dimensions = dimensions or {}
            self.measures = measures or {}

        def copy(self):
            return MetaMeasureProvider.GraphCache(
                **{
                    key: nested_dict_copy(getattr(self, key))
                    for key in [
                        'providers', 'identifiers', 'foreign_keys',
                        'reverse_foreign_keys', 'dimensions', 'measures'
                    ]
                })

        def register(self, provider):
            # TODO: Enforce that measures and dimensions share same namespace,
            # and never conflict with stat types
            # TODO: Ensure no contradictory key types (e.g. Two identifiers
            # primary on one table and not both primary on a secondary table)

            # Require that each provider have at least one primary key and a
            # measure "count".
            # TODO: Uncomment these checks and retain compatibility with nested
            # MetaMeasureProvider instances.
            # if len(list(identifier for identifier in provider.identifiers if identifier.is_unique)) == 0:
            #     raise RuntimeError("MeasureProvider '{}' does not have at least one unique identifier.".format(provider))
            # if 'count' not in provider.measures:
            #     raise RuntimeError("MeasureProvider '{}' does not provide a 'count' measure.".format(provider))

            for identifier in provider.identifiers:
                self.register_identifier(identifier)

                for unit_type in provider.identifiers:
                    self.register_foreign_key(identifier, unit_type)

                for dimension in provider.dimensions_for_unit(identifier):
                    self.register_dimension(identifier, dimension)

                for measure in provider.measures_for_unit(identifier):
                    self.register_measure(identifier, measure)

        def _handled_resolved_features(f):
            def wrapped(self, unit_type, *args):
                assert len(args) in (0, 1)

                if args:
                    if isinstance(unit_type, _ResolvedFeature):
                        unit_type = unit_type.from_provider(
                            list(unit_type._providers.values())[0])

                    if isinstance(args[0], _ResolvedFeature):
                        for provider in args[0]._providers:
                            f(self, unit_type, args[0].from_provider(provider))
                    else:
                        f(self, unit_type, args[0])

                else:
                    if isinstance(unit_type, _ResolvedFeature):
                        for provider in unit_type._providers:
                            f(self, unit_type.from_provider(provider))
                    else:
                        f(self, unit_type)

            return wrapped

        @_handled_resolved_features
        def register_identifier(self, unit_type):
            if isinstance(unit_type, _ResolvedFeature):
                for provider in unit_type._providers:
                    provided = unit_type.from_provider(provider)
                    self._append(self.identifiers, [provided], provided)
            else:
                self._append(self.identifiers, [unit_type], unit_type)

        @_handled_resolved_features
        def register_foreign_key(self, unit_type, foreign_key):
            if unit_type.is_unique:
                self._append(self.foreign_keys, [unit_type, foreign_key],
                             foreign_key)
            elif foreign_key.is_unique:
                self._append(self.reverse_foreign_keys,
                             [unit_type, foreign_key], foreign_key)

        @_handled_resolved_features
        def register_dimension(self, unit_type, dimension):
            self._append(self.dimensions, [unit_type, dimension], dimension)

        @_handled_resolved_features
        def register_measure(self, unit_type, measure):
            self._append(self.measures, [unit_type, measure], measure)

        @staticmethod
        def _extract(store, keys):
            for key in keys:
                if key not in store:
                    return []
                store = store[key]
            assert isinstance(store, list)
            return store

        @staticmethod
        def _append(store, keys, value):
            for i, key in enumerate(keys):
                if key not in store:
                    if i == len(keys) - 1:
                        store[key] = []
                    else:
                        store[key] = {}
                store = store[key]
            assert isinstance(store, list)
            if store and not (value.shared and all([d.shared for d in store])):
                raise RuntimeError(
                    "Attempted to add duplicate non-shared feature '{}'.".
                    format(value))
            store.append(value)

    # Initialisation methods

    def __init__(self, name=None):
        MeasureProvider.__init__(self, name)
        self._providers = SequenceMap()
        self._stats_registry = StatsRegistry(fallback=global_stats_registry)
        self._cache = MetaMeasureProvider.GraphCache()

    # MeasureProvider registration

    @property
    def providers(self):
        """A SequenceMap of all of the providers hosted by this registry."""
        return self._providers

    def register(self, provider):
        """
        This method atomically registers a provider, and extends the graph to
        include it. Once registered, its features will be immediately available
        to all evaluations.
        """
        if provider.name in self._providers:
            raise ValueError(
                "A MeasureProvider named '{}' has already been registered.".
                format(provider.name))
        self._providers[provider.name] = provider

        cache = self._cache.copy()
        cache.register(provider)
        # Committing cache
        self._cache = cache

        return self

    def register_from_yaml(self, path_or_yaml):
        if '\n' in path_or_yaml or not os.path.isdir(
                os.path.expanduser(path_or_yaml)):
            return self.register(MeasureProvider.from_yaml(path_or_yaml))
        else:
            for dirpath, dirnames, filenames in os.walk(
                    os.path.expanduser(path_or_yaml)):
                for filename in filenames:
                    if filename.endswith('.yml'):
                        try:
                            provider = MeasureProvider.from_yaml(
                                os.path.join(dirpath, filename))
                            self.register(provider)
                        except AssertionError:
                            pass

    def unregister(self, provider):
        """
        Remove a nominated provider from this registry.

        Args:
            provider (MeasureProvider, str): The provider to be removed.

        Returns:
            MeasureProvider: The removed provider.
        """
        provider = self._providers.pop(provider)
        self._cache_refresh()
        return provider

    def _cache_refresh(self):
        self._cache = MetaMeasureProvider.GraphCache()
        for provider in self._providers.values():
            self._cache.register(provider)

    # Transform registration
    def register_transform(self, transform, name=None, backend=None):
        return self._stats_registry.transforms.register(transform=transform,
                                                        name=name,
                                                        backend=backend)

    def register_agg(self, agg, name=None, backend=None):
        return self._stats_registry.aggregations.register(agg=agg,
                                                          name=name,
                                                          backend=backend)

    @property
    def identifiers(self):
        return SequenceMap(
            self.identifier_for_unit(ut)
            for ut in self._cache.identifiers.keys())

    # MeasureEvaluator methods
    def identifier_for_unit(self, unit_type):
        return _ResolvedFeature(
            name=unit_type if isinstance(unit_type, str) else unit_type.name,
            providers=[d.provider for d in self._cache.identifiers[unit_type]],
            kind='identifier')

    def _features_lookup(self, unit_type, kind, attr_filter=None):
        assert kind in ('foreign_key', 'reverse_foreign_key', 'dimension',
                        'measure')

        unit_type = self.identifier_for_unit(unit_type)
        feature_source = getattr(self._cache, kind + 's')

        features = SequenceMap()
        for avail_unit_type in feature_source:
            if avail_unit_type.matches(unit_type):
                for feature, instances in feature_source[
                        avail_unit_type].items():
                    if feature not in features and (not attr_filter
                                                    or attr_filter(feature)):
                        mask = None
                        if kind in ('foreign_key', 'reverse_foreign_key'
                                    ) and avail_unit_type == feature.name:
                            mask = unit_type.name
                        features.append(
                            _ResolvedFeature(
                                feature.name,
                                providers=[d.provider for d in instances],
                                unit_type=unit_type,
                                mask=mask,
                                kind=kind))
        return features

    def foreign_keys_for_unit(self, unit_type):
        return self._features_lookup(unit_type, 'foreign_key')

    def reverse_foreign_keys_for_unit(self, unit_type):
        return self._features_lookup(unit_type, 'reverse_foreign_key')

    def dimensions_for_unit(self, unit_type, include_partitions=True):
        return self._features_lookup(
            unit_type,
            'dimension',
            attr_filter=None
            if include_partitions else lambda feature: not feature.partition)

    def partitions_for_unit(self, unit_type):
        return self._features_lookup(
            unit_type,
            'dimension',
            attr_filter=lambda feature: feature.partition)

    def measures_for_unit(self, unit_type):
        return self._features_lookup(unit_type, 'measure')

    def _resolve(self, unit_type, feature, role=None):
        unit_type = self.identifier_for_unit(unit_type)
        via = ''
        attrs = {}
        eff_unit_type = unit_type

        if isinstance(feature, (_ResolvedFeature, _ProvidedFeature)):
            attrs = feature.attrs
            del attrs['name']
            feature = feature.via_name  # Re-resolve any resolved feature, since resolved features are currently not deeply resolved

        if isinstance(feature, str):
            s = feature.split('/')
            # assert len(s) == 1, '/'.join([str(unit_type), str(feature)])
            if len(s) > 1 and s[
                    0] == unit_type.name:  # Remove reference to current unit_type
                s = s[1:]
            via_suffix = '/'.join(s[:-1])
            feature = s[-1]
            if via_suffix:
                eff_unit_type = self.identifier_for_unit(s[-2])
                via += ('/' + via_suffix) if via else via_suffix
            attrs['unit_type'] = unit_type

        return MeasureProvider._resolve(
            self, eff_unit_type, feature,
            role=role)._with_attrs(**attrs).as_via(via)

    def _find_primary_key_for_unit_type(self, unit_type):
        for identifier in sorted(self._cache.identifiers,
                                 key=lambda x: len(x.name),
                                 reverse=True):
            if identifier.matches(unit_type) and any(
                    i.is_primary for i in self._cache.identifiers[identifier]):
                return identifier
        raise RuntimeError(
            "No primary key exists for unit_type `{}`.".format(unit_type))

    def _find_optimal_provision(self,
                                unit_type,
                                measures,
                                dimensions,
                                require_primary=True):
        """
        This method takes a set of meaures and dimensions for a given unit_type,
        and generates a somewhat optimised sequence of `Provision` instances,
        which indicate the MeasureProvider instance from which measures and
        dimensions should be extracted. This is primarily useful for the
        generation of an `EvaluationStrategy`.

        Args:
            unit_type (str, _StatisticalUnitIdentifier): The statistical unit
                type for which indicated measures and dimensions should be
                extracted.
            measures (list<str,_Measure>): A set of measures to be extracted.
            dimensions (list<str, _Dimension): A set of dimensions to be
                extracted.
            require_primary (bool): Whether to require the first `Provision` to
                be from a `MeasureProvider` with `unit_type` as a primary
                identifier.

        Returns:
            list<Provision>: A list of `Provision` instances which optimally
                supply the requested measures and dimensions.
        """

        # TODO: Handle relation case, where ...

        # [Provision(provider, measures, dimensions), ...]
        unit_type = self.identifier_for_unit(unit_type)
        measures = {
            self.resolve(unit_type, measure, role='measure'):
            self.resolve(unit_type, measure, role='measure')
            for measure in measures
        }
        dimensions = {
            self.resolve(unit_type, dimension, role='dimension'):
            self.resolve(unit_type, dimension, role='dimension')
            for dimension in dimensions
        }

        def get_next_provider(unit_type, measures, dimensions, primary=False):
            provider_count = Counter()
            provider_count.update(provider for measure in measures.values()
                                  for provider in measure.providers.values())
            provider_count.update(provider
                                  for dimension in dimensions.values()
                                  for provider in dimension.providers.values())

            provider = None
            if primary:
                primary_unit_type = self._find_primary_key_for_unit_type(
                    unit_type)
                # Try to extract primary provider from used providers, or locate
                # one in the unit_type registry.

                for p, _ in provider_count.most_common() + [
                    (ut.provider, 0) for ut in self._cache.identifiers[
                        primary_unit_type.name] if ut.is_primary
                ]:
                    if p.identifiers.get(
                            primary_unit_type) and p.identifiers.get(
                                primary_unit_type).is_primary:
                        provider = p
                        break
                if provider is None:
                    raise ValueError("No primary key for {}.".format(
                        unit_type.name))
            else:
                provider = provider_count.most_common(1)[0][0]

            return provider

        provisions = []
        dimension_count = len(measures) + len(dimensions)

        while dimension_count > 0:
            p = get_next_provider(unit_type,
                                  measures,
                                  dimensions,
                                  primary=True if require_primary
                                  and len(provisions) == 0 else False)
            join_prefix = unit_type.name

            provisions.append(
                Provision(
                    p,
                    join_prefix,
                    measures=[
                        measures.pop(measure).from_provider(p)
                        for measure in measures.copy()
                        if measure in p.measures_for_unit(unit_type)
                    ],
                    dimensions=[
                        dimensions.pop(dimension).from_provider(p)
                        for dimension in dimensions.copy()
                        if dimension in p.dimensions_for_unit(unit_type)
                        or dimension in p.foreign_keys_for_unit(unit_type)
                        or dimension in p.measures_for_unit(unit_type)
                    ]  # TODO: Use p.resolve?
                ))
            if len(measures) + len(dimensions) == dimension_count and not (
                    require_primary is True and len(provisions) == 1):
                raise RuntimeError(
                    "Could not provide provisions for: measures={}, dimensions={}. This is a bug."
                    .format(list(measures), list(dimensions)))
            dimension_count = len(measures) + len(dimensions)

        return provisions

    def evaluate(self,
                 unit_type,
                 measures=None,
                 segment_by=None,
                 where=None,
                 joins=None,
                 stats=True,
                 covariates=False,
                 context=None,
                 stats_registry=None,
                 **opts):
        strategy = self.get_strategy(unit_type,
                                     measures=measures,
                                     segment_by=segment_by,
                                     where=where,
                                     context=context)
        return strategy.execute(stats=stats,
                                covariates=covariates,
                                context=context,
                                **opts)

    def get_ir(self,
               unit_type,
               measures=None,
               segment_by=None,
               where=None,
               joins=None,
               stats=True,
               covariates=False,
               context=None,
               stats_registry=None,
               **opts):
        strategy = self.get_strategy(unit_type,
                                     measures=measures,
                                     segment_by=segment_by,
                                     where=where,
                                     context=context)
        return strategy.execute(stats=stats,
                                covariates=covariates,
                                ir_only=True,
                                context=context,
                                **opts)

    def get_strategy(self,
                     unit_type,
                     measures=None,
                     segment_by=None,
                     where=None,
                     context=None):
        # TODO: incorporate context into strategy evaluation
        # TODO: Add support for joins to meta measure provider
        # TODO: Add support for stats_registry
        return EvaluationStrategy.from_spec(self,
                                            unit_type,
                                            measures=measures,
                                            segment_by=segment_by,
                                            where=where)
Пример #12
0
 def __init__(self, name=None):
     MeasureProvider.__init__(self, name)
     self._providers = SequenceMap()
     self._stats_registry = StatsRegistry(fallback=global_stats_registry)
     self._cache = MetaMeasureProvider.GraphCache()
Пример #13
0
 def reverse_foreign_keys_for_unit(self, unit_type):
     return SequenceMap()
Пример #14
0
class EvaluationStrategy(object):

    class Type(Enum):
        REGULAR = 1
        UNIT_REBASE = 2

    @classmethod
    def from_spec(cls, registry, unit_type, measures=None, segment_by=None, where=None, **opts):

        # Step 0: Resolve applicable measures and dimensions
        unit_type = registry.identifier_for_unit(unit_type)
        measures = [] if measures is None else measures
        segment_by = [] if segment_by is None else segment_by

        if not isinstance(measures, list):
            measures = [measures]
        if not isinstance(segment_by, list):
            segment_by = [segment_by]

        measures = [
            registry.resolve(unit_type, measure, role='measure') for measure in measures
        ]

        segment_by = [
            registry.resolve(unit_type, dimension, role='dimension') for dimension in segment_by
        ]

        where = Constraint.from_spec(where)
        where_dimensions = [
            (
                registry.resolve(unit_type, dimension, role='dimension').as_implicit
            )
            for dimension in where.scoped_for_unit_type(unit_type).dimensions
            if dimension not in segment_by
        ]

        # Step 1: Collect measures and dimensions into groups based on current unit_type
        # and next unit_type

        current_evaluation = FeatureBundle(unit_type=unit_type, dimensions=[], measures=[])
        next_evaluations = {}

        def collect_dimensions(dimensions, kind='measures', for_constraint=False):
            for dimension in dimensions:
                if not dimension.via:
                    current_evaluation._asdict()[kind].append(dimension)
                elif (  # Handle reverse foreign key joins
                    dimension.next_unit_type in registry.reverse_foreign_keys_for_unit(unit_type)
                ):
                    next_unit_type = registry.resolve(unit_type, dimension.next_unit_type, role='reverse_foreign_key')
                    if next_unit_type not in next_evaluations:
                        next_evaluations[next_unit_type] = FeatureBundle(unit_type=unit_type, dimensions=[], measures=[])
                    next_evaluations[next_unit_type]._asdict()[kind].append(dimension.via_next)
                else:
                    next_unit_type = registry.resolve(unit_type, dimension.next_unit_type, role='foreign_key')
                    if next_unit_type not in next_evaluations:
                        next_evaluations[next_unit_type] = FeatureBundle(unit_type=next_unit_type, dimensions=[], measures=[])
                    next_evaluations[next_unit_type]._asdict()[kind].append(dimension.via_next)

        collect_dimensions(measures, kind='measures')
        collect_dimensions(segment_by, kind='dimensions')
        collect_dimensions(where_dimensions, kind='dimensions', for_constraint=True)

        # Add required dimension for joining in next unit_types
        for dimension_bundle in next_evaluations.values():
            fk = registry.resolve(unit_type, dimension_bundle.unit_type, role='foreign_key')
            if fk not in current_evaluation.dimensions:
                current_evaluation.dimensions.append(fk.as_private)

        # Step 2: Create optimal joins for current unit_type

        provisions = registry._find_optimal_provision(
            unit_type=unit_type,
            measures=current_evaluation.measures,
            dimensions=current_evaluation.dimensions
        )

        evaluations = []
        for provision in provisions:
            generic_constraints = where.generic_for_provider(provision.provider)
            generic_constraint_dimensions = [
                provision.provider.resolve(unit_type, dimension).as_private
                for dimension in generic_constraints.dimensions
                if not provision.dimensions or dimension not in provision.dimensions
            ]
            evaluations.append(
                cls(
                    registry=registry,
                    provider=provision.provider,
                    unit_type=unit_type,
                    measures=provision.measures,
                    segment_by=provision.dimensions + generic_constraint_dimensions,
                    where=generic_constraints,
                    join_prefix=provision.join_prefix
                )
            )

        # Step 3: For each next unit_type, recurse problem and join into above query

        for foreign_key, dim_bundle in next_evaluations.items():
            foreign_strategy = cls.from_spec(registry=registry, unit_type=foreign_key,
                                             measures=dim_bundle.measures, segment_by=dim_bundle.dimensions,
                                             where=where.via_next(foreign_key.name), **opts)

            if foreign_key != dim_bundle.unit_type:  # Reverse foreign key join
                foreign_key = dim_bundle.unit_type
                foreign_strategy.unit_type = dim_bundle.unit_type

            added = False
            for sub_strategy in evaluations:
                for dimension in sub_strategy.segment_by:
                    if isinstance(dimension, _StatisticalUnitIdentifier) and dimension.matches(foreign_key):
                        sub_strategy.add_join(foreign_key, foreign_strategy)
                        added = True
                        break
            if not added:
                raise RuntimeError("Could not add foreign strategy: {}".format(foreign_strategy))

        strategy = evaluations[0]
        for sub_strategy in evaluations[1:]:
            strategy.add_join(unit_type, sub_strategy)

        strategy.where = And.from_operands(strategy.where, where.scoped_for_unit_type(unit_type).scoped_applicable)

        # Step 4: Mark any resolved where dependencies as private, unless otherwise
        # requested in `segment_by`

        for dimension in strategy.segment_by:
            if dimension.implicit and dimension in where.scoped_for_unit_type(unit_type).dimensions:
                strategy.segment_by[dimension] = strategy.segment_by[dimension].as_private

        # Step 5: Return EvaluationStrategy, and profit.

        return strategy

    def __init__(self, registry, provider, unit_type, measures, segment_by=None,
                 where=None, join_on_left=None, join_on_right=None,
                 join_prefix=None, joins=None):
        self.registry = registry
        self.provider = provider

        # Statistical unit used for evaluation
        self.unit_type = unit_type

        # Anticipated measures, segmentations and constraints
        self.measures = SequenceMap(measures or [])
        self.segment_by = SequenceMap(segment_by or [])
        self.where = where

        # Join parameters
        self.is_joined = False
        self.join_is_compatible = True
        self.join_on_left = join_on_left
        self.join_on_right = join_on_right or [self.matched_unit_type.name]
        self.joins = joins or []
        self.join_prefix = join_prefix or self.unit_type.name

    def _check_constraints(self, prefix=None, raise_on_unconstrained=True):
        """
        This method checks whether dimensions that require constraints have been
        constrained.
        """
        unconstrained = []
        constrained_dimensions = self.where.dimensions
        constrained_dimensions.extend(self.join_on_right)

        for dimension in self.provider.dimensions_for_unit(self.unit_type):
            if dimension.requires_constraint and dimension not in constrained_dimensions:
                unconstrained.append('{}/{}'.format(prefix, dimension.name) if prefix else dimension.name)

        for join in self.joins:
            unconstrained.extend(join._check_constraints(prefix='{}/{}'.format(prefix, join.unit_type.name) if prefix else join.unit_type.name, raise_on_unconstrained=False))

        if raise_on_unconstrained and len(unconstrained) > 0:
            raise RuntimeError("The following dimensions require and lack constraints: {}.".format(unconstrained))

        return unconstrained

    @property
    def matched_unit_type(self):
        return self.provider.identifier_for_unit(self.unit_type)

    @property
    def strategy_type(self):
        if not self.matched_unit_type.is_unique:
            return self.Type.UNIT_REBASE
        else:
            return self.Type.REGULAR

    @property
    def joins_all_compatible(self):
        for join in self.joins:
            if (
                not self.provider.is_compatible_with(join.provider)
                or not join.joins_all_compatible
            ):
                return False
        return True

    def __repr__(self):
        class StrategyEncoder(json.JSONEncoder):
            def default(self, o):
                if isinstance(o, EvaluationStrategy):
                    d = OrderedDict([
                        ('provider', o.provider),
                        ('unit_type', o.unit_type)
                    ])
                    d['strategy_type'] = o.strategy_type
                    if o.measures:
                        d['measures'] = o.measures
                    if o.segment_by:
                        d['segment_by'] = o.segment_by
                    if o.where:
                        d['where'] = o.where
                    if o.is_joined:
                        d['join_on_left'] = o.join_on_left
                        d['join_on_right'] = o.join_on_right
                        d['join_type'] = o.join_type
                        if o.join_prefix != o.unit_type.name:
                            d['join_prefix'] = o.join_prefix
                        d['join_is_compatible'] = o.join_is_compatible
                    if o.joins:
                        d['joins'] = o.joins
                        d['joins_all_compatible'] = o.joins_all_compatible
                    return d
                return o.__repr__()
        return 'EvaluationStrategy(' + json.dumps(self, indent=4, cls=StrategyEncoder, ensure_ascii=False) + ')'

    def add_join(self, unit_type, strategy):
        # TODO: Make atomic
        assert isinstance(strategy, EvaluationStrategy)

        # Add primary join key if missing and set join
        self_unit_type = self.provider.identifier_for_unit(unit_type.name).with_mask(unit_type.name)
        join_unit_type = strategy.provider.identifier_for_unit(unit_type.name)
        if self_unit_type not in self.segment_by:
            self.segment_by.prepend(self_unit_type.as_private)
        if join_unit_type not in strategy.segment_by:
            strategy.segment_by.prepend(join_unit_type)
        else:
            strategy.segment_by[join_unit_type].private = False

        strategy.join_on_left = [self_unit_type.fieldname(role='dimension')]
        strategy.join_on_right = [join_unit_type.fieldname(role='dimension')]

        # Add common partitions to join keys
        common_partitions = list(
            set(self.provider.partitions_for_unit(self_unit_type.fieldname(role='dimension')))
            .intersection(strategy.provider.partitions_for_unit(join_unit_type.fieldname(role='dimension')))
        )

        for partition in common_partitions:
            if partition not in self.segment_by:
                self.segment_by.append(self.provider.resolve(self.unit_type, partition, role='dimension').as_private)
            if partition not in strategy.segment_by:
                strategy.segment_by.append(strategy.provider.resolve(strategy.unit_type, partition, role='dimension'))
            else:
                strategy.segment_by[partition].private = False
            strategy.join_on_left.extend([p.fieldname(role='dimension') for p in common_partitions])
            strategy.join_on_right.extend([p.fieldname(role='dimension') for p in common_partitions])

        # Add measures and segmentations in parent from join
        self.measures.extend(
            (
                measure.as_external.as_via(strategy.join_prefix)
                if strategy.join_prefix != self.unit_type else
                measure.as_external
            )
            for measure in strategy.measures
            if not measure.private
        )

        self.segment_by.extend(
            (
                dimension.as_external.as_via(strategy.join_prefix)
                if strategy.join_prefix != self.unit_type else
                dimension.as_external
            )
            for dimension in strategy.segment_by
            if (
                not dimension.private
                and (
                    dimension not in strategy.join_on_right
                    or dimension.implicit
                )
            )
        )

        # Set join metadata on incoming strategy
        strategy.is_joined = True
        strategy.join_is_compatible = (
            self.provider.is_compatible_with(strategy.provider)
            and strategy.joins_all_compatible
        )
        if strategy.join_prefix == self.join_prefix:
            strategy.join_prefix = None

        self.joins.append(strategy)
        return self

    @property
    def join_type(self):
        if self.strategy_type == self.Type.UNIT_REBASE:
            return 'left'
        if len(self.where.dimensions) > 0:
            return 'inner'
        for join in self.joins:
            if join.join_type == 'inner':
                return 'inner'
        return 'left'

    def execute(self, stats=True, ir_only=False, as_join=False,
                compatible=False, **opts):

        self._check_constraints()

        # Step 1: Build joins
        stats = stats and not self.is_joined
        joins = []

        for join in self.joins:
            joins.append(join.execute(
                as_join=True,
                compatible=self.provider.is_compatible_with(join.provider),
                **opts
            ))

        # Step 2: Evaluate provider
        if as_join and compatible:
            try:
                return Join(
                    provider=self.provider,
                    unit_type=self.unit_type,
                    join_prefix=self.join_prefix,
                    left_on=self.join_on_left,
                    right_on=self.join_on_right,
                    measures=self.measures,
                    dimensions=self.segment_by,
                    object=self.provider.get_ir(
                        unit_type=self.unit_type,
                        measures=self.measures,
                        segment_by=self.segment_by,
                        where=self.where,
                        joins=joins,
                        stats_registry=self.registry._stats_registry,
                        stats=stats,
                        **opts
                    ),
                    how=self.join_type,
                    compatible=True
                )
            except NotImplementedError:
                pass

        if ir_only:
            return self.provider.get_ir(
                unit_type=self.unit_type,
                measures=self.measures,
                segment_by=self.segment_by,
                where=self.where,
                joins=joins,
                stats_registry=self.registry._stats_registry,
                stats=stats,
                **opts
            )
        else:
            evaluated = self.provider.evaluate(
                unit_type=self.unit_type,
                measures=self.measures,
                segment_by=self.segment_by,
                where=self.where,
                joins=joins,
                stats_registry=self.registry._stats_registry,
                stats=stats,
                **opts
            )

            if as_join:
                if self.join_prefix:
                    evaluated = evaluated.add_prefix('{}/'.format(self.join_prefix))
                    right_on = ['{}/{}'.format(self.join_prefix, j) for j in self.join_on_right]
                else:
                    right_on = self.join_on_right

                return Join(
                    provider=self.provider,
                    unit_type=self.unit_type,
                    join_prefix=self.join_prefix,
                    left_on=self.join_on_left,
                    right_on=right_on,
                    measures=self.measures,
                    dimensions=self.segment_by,
                    how=self.join_type,
                    object=evaluated,
                    compatible=False
                )
            return evaluated