Пример #1
0
    def _features_lookup(self, unit_type, kind, attr_filter=None):
        assert kind in ('foreign_key', 'reverse_foreign_key', 'dimension',
                        'measure')

        unit_type = self.identifier_for_unit(unit_type)
        feature_source = getattr(self._cache, kind + 's')

        features = SequenceMap()
        for avail_unit_type in feature_source:
            if avail_unit_type.matches(unit_type):
                for feature, instances in feature_source[
                        avail_unit_type].items():
                    if feature not in features and (not attr_filter
                                                    or attr_filter(feature)):
                        mask = None
                        if kind in ('foreign_key', 'reverse_foreign_key'
                                    ) and avail_unit_type == feature.name:
                            mask = unit_type.name
                        features.append(
                            _ResolvedFeature(
                                feature.name,
                                providers=[d.provider for d in instances],
                                unit_type=unit_type,
                                mask=mask,
                                kind=kind))
        return features
Пример #2
0
 def _get_dimensions_from_specs(self, cls, specs):
     dims = SequenceMap()
     if specs is None:
         return dims
     for spec in specs:
         dim = cls.from_spec(spec, provider=self)
         dims[dim] = dim
     return dims
Пример #3
0
    def measures_for_unit(self, unit_type):
        unit_type = self.identifier_for_unit(unit_type)
        if unit_type is None:
            return self.measures

        measures = SequenceMap()
        for measure in self.measures:
            if self._unit_has_measure(unit_type, measure):
                measures.append(measure)
        return measures
Пример #4
0
    def dimensions_for_unit(self, unit_type, include_partitions=True):
        unit_type = self.identifier_for_unit(unit_type)
        if unit_type is None:
            return self.dimensions

        dimensions = SequenceMap()
        for dimension in self.dimensions:
            if (self._unit_has_dimension(unit_type, dimension)
                    and (include_partitions or not dimension.partition)):
                dimensions.append(dimension)
        return dimensions
Пример #5
0
    def foreign_keys_for_unit(self, unit_type):
        unit_type = self.identifier_for_unit(unit_type)
        if unit_type is None:
            return self.identifiers

        foreign_keys = SequenceMap()
        for foreign_key in self.identifiers:
            if self._unit_has_foreign_key(unit_type, foreign_key):
                if unit_type.name == foreign_key:
                    foreign_key = foreign_key.with_mask(unit_type.mask)
                foreign_keys.append(foreign_key)
        return foreign_keys
Пример #6
0
    def __init__(self, registry, provider, unit_type, measures, segment_by=None,
                 where=None, join_on_left=None, join_on_right=None,
                 join_prefix=None, joins=None):
        self.registry = registry
        self.provider = provider

        # Statistical unit used for evaluation
        self.unit_type = unit_type

        # Anticipated measures, segmentations and constraints
        self.measures = SequenceMap(measures or [])
        self.segment_by = SequenceMap(segment_by or [])
        self.where = where

        # Join parameters
        self.is_joined = False
        self.join_is_compatible = True
        self.join_on_left = join_on_left
        self.join_on_right = join_on_right or [self.matched_unit_type.name]
        self.joins = joins or []
        self.join_prefix = join_prefix or self.unit_type.name
Пример #7
0
    def resolve(self, unit_type, features, role=None, with_attrs=None):
        """
        This method resolves one or more features optionally associated with a
        unit_type and a role. Note that this method is concerned about
        *functional* resolution, so if `role='dimension'` both identifiers and
        measures will be resolved, since they can be used as dimensions.

        Parameters:
            names (str, list<str>): A name or list of names to resolve.
            unit_type (str, None): A unit type for which the resolution should
                be done.
            role (str, None): One of 'measure', 'dimension', 'identifier' or `None`.
            with_attrs (dict, None): Attributes to set on the returned feature.
                Note that these are *additive* to any attributes already inherited
                from feature_type (which are otherwise preserved).

        Returns:
            _Dimension, _Measure, _StatisticalUnitIdentifier: The resolved object.
        """
        return_one = False

        if not isinstance(features, (list, SequenceMap)):
            return_one = True
            features = [features]

        unresolvable = []
        resolved = SequenceMap()
        for feature in features:
            try:
                attrs = with_attrs.copy() if with_attrs else {}
                if isinstance(feature, tuple):
                    feature = FeatureSpec(feature[0], **feature[1])
                if isinstance(feature, dict):
                    feature = FeatureSpec(**feature)
                if isinstance(feature, FeatureSpec):
                    feature, extra_attrs = feature.as_source_with_attrs(
                        unit_type)
                    attrs.update(extra_attrs)
                r = self._resolve(unit_type=unit_type,
                                  feature=feature,
                                  role=role)._with_attrs(**attrs)
                resolved[r] = r
            except ValueError:
                unresolvable.append(feature)
        if len(unresolvable):
            raise ValueError(
                "Could not resolve {}(s) associated with unit_type '{}' for: '{}'"
                .format(role or 'feature', unit_type.__repr__(),
                        "', '".join(str(dim) for dim in unresolvable)))

        if return_one:
            return resolved.first
        return resolved
Пример #8
0
        def wrapped(self,
                    unit_type,
                    measures=None,
                    segment_by=None,
                    where=None,
                    joins=None,
                    stats=True,
                    covariates=False,
                    context=None,
                    stats_registry=None,
                    **opts):
            unit_type = self.identifier_for_unit(unit_type)
            if isinstance(measures, (str, _ProvidedFeature)):
                measures = [measures]
            measures = SequenceMap() if measures is None else self.resolve(
                unit_type=unit_type, features=measures, role='measure')
            if isinstance(segment_by, (str, _ProvidedFeature)):
                segment_by = [segment_by]
            segment_by = SequenceMap() if segment_by is None else self.resolve(
                unit_type=unit_type, features=segment_by, role='dimension')
            where = Constraint.from_spec(where)
            joins = joins or []
            stats_registry = stats_registry or global_stats_registry
            context = context or {}

            # opts = self.opts.process(**opts)
            return f(self,
                     unit_type,
                     measures=measures,
                     segment_by=segment_by,
                     where=where,
                     joins=joins,
                     stats=stats,
                     covariates=covariates,
                     context=context,
                     stats_registry=stats_registry,
                     **opts)
Пример #9
0
    def _evaluate(self, unit_type, measures, segment_by, where, joins, stats,
                  covariates, context, stats_registry, **opts):

        assert stats_registry is not None
        assert not any(measure.external for measure in measures)
        assert not any(dimension.external for dimension in segment_by)
        rebase_agg = not unit_type.is_unique

        raw_data = (self.data.assign(count=1))

        where_dims = SequenceMap([
            self.dimensions[dim] for dim in where.dimensions
            if dim not in segment_by
        ])
        df = (pd.DataFrame().assign(
            **{
                dimension.fieldname(role='dimension',
                                    unit_type=unit_type if not rebase_agg else
                                    None): raw_data.eval(dimension.expr)
                for dimension in itertools.chain(segment_by, where_dims)
            }).assign(
                **{
                    measure.fieldname(role='measure',
                                      unit_type=unit_type if not rebase_agg
                                      else None): raw_data.eval(measure.expr)
                    for measure in measures
                }))

        return (self._finalise_dataframe(df,
                                         unit_type=unit_type,
                                         measures=measures,
                                         segment_by=segment_by,
                                         where=where,
                                         stats_registry=stats_registry,
                                         stats=stats,
                                         rebase_agg=rebase_agg))
Пример #10
0
 def identifiers(self):
     return SequenceMap(
         self.identifier_for_unit(ut)
         for ut in self._cache.identifiers.keys())
Пример #11
0
 def __init__(self, name=None):
     MeasureProvider.__init__(self, name)
     self._providers = SequenceMap()
     self._stats_registry = StatsRegistry(fallback=global_stats_registry)
     self._cache = MetaMeasureProvider.GraphCache()
Пример #12
0
 def reverse_foreign_keys_for_unit(self, unit_type):
     return SequenceMap()