Exemplo n.º 1
0
    def evaluate(self,
                 metric,
                 segment_by=None,
                 where=None,
                 dry_run=False,
                 ir_only=False,
                 **opts):
        if isinstance(metric, list):
            return [
                self.evaluate(m,
                              segment_by=segment_by,
                              where=where,
                              dry_run=dry_run,
                              opts=opts) for m in metric
            ]

        metric = self._metrics[metric]
        if segment_by is None:
            segment_by = []
        if not isinstance(segment_by, list):
            segment_by = [segment_by]

        measures = metric.required_measures
        if metric.required_segmentation:
            segment_by += list(
                set(metric.required_segmentation).difference(segment_by))
        marginal_dimensions = list(
            set(metric.marginal_dimensions or []).difference(segment_by))
        segment_by += marginal_dimensions

        if metric.required_constraints:
            required_constraints = Constraint.from_spec(
                metric.required_constraints)
            if where is None:
                where = required_constraints
            else:
                where = Constraint.from_spec(where) & required_constraints

        strategy = self.measures.evaluate(metric.unit_type,
                                          measures=measures,
                                          segment_by=segment_by,
                                          where=where,
                                          dry_run=True,
                                          **opts.pop('measure_opts', {}))

        if dry_run:
            return strategy

        result = metric.evaluate(strategy,
                                 marginal_dimensions,
                                 ir_only=ir_only,
                                 **opts)

        if isinstance(result, pd.Series):
            return MeasureSeries(result)
        else:
            return MeasureDataFrame(result)
Exemplo n.º 2
0
    def test_strategy_methods(self):
        c = Constraint.from_spec({'*/unit/a': 1, '*/b': 2, 'c': 3})

        self.assertEqual(c.scoped_for_unit_type('unit'),
                         Constraint.from_spec({
                             'a': 1,
                             'c': 3
                         }))
        self.assertEqual(c.scoped_for_unit_type('other'),
                         Constraint.from_spec({'c': 3}))

        mp = MeasureProvider().provides_dimension('b')
        self.assertEqual(c.generic_for_provider(mp),
                         Constraint.from_spec({'b': 2}))
Exemplo n.º 3
0
 def wrapped(self,
             unit_type,
             measures=None,
             segment_by=None,
             where=None,
             joins=None,
             stats_registry=None,
             stats=True,
             covariates=False,
             **opts):
     unit_type = self.identifier_for_unit(unit_type)
     measures = {} if measures is None else self.resolve(
         unit_type=unit_type, features=measures, role='measure')
     segment_by = {} if segment_by is None else self.resolve(
         unit_type=unit_type, features=segment_by, role='dimension')
     where = Constraint.from_spec(where)
     joins = joins or []
     stats_registry = stats_registry or global_stats_registry
     opts = self.opts.process(**opts)
     return f(self,
              unit_type,
              measures=measures,
              segment_by=segment_by,
              where=where,
              joins=joins,
              stats_registry=stats_registry,
              stats=stats,
              covariates=covariates,
              **opts)
Exemplo n.º 4
0
    def test_constraint_specs(self):
        c = Constraint.from_spec(spec={'a': 10})
        self.assertIsInstance(c, Constraint)
        self.assertEqual(c.field, 'a')
        self.assertEqual(c.value, 10)
        self.assertEqual(c.relation, '==')
        self.assertFalse(c.generic)

        c = Constraint.from_spec(spec={'a': "10"})
        self.assertIsInstance(c, Constraint)
        self.assertEqual(c.field, 'a')
        self.assertEqual(c.value, "10")
        self.assertEqual(c.relation, '==')
        self.assertFalse(c.generic)

        c = Constraint.from_spec(spec={'*/a': ('>', 10)})
        self.assertIsInstance(c, Constraint)
        self.assertEqual(c.field, 'a')
        self.assertEqual(c.value, 10)
        self.assertEqual(c.relation, '>')
        self.assertTrue(c.generic)

        c = Constraint.from_spec(spec={'a': '>10'})
        self.assertIsInstance(c, Constraint)
        self.assertEqual(c.field, 'a')
        self.assertEqual(c.value, '10')
        self.assertEqual(c.relation, '>')
        self.assertFalse(c.generic)

        c = Constraint.from_spec(spec={'a': {1, 2, 3}})
        self.assertIsInstance(c, Constraint)
        self.assertEqual(c.field, 'a')
        self.assertEqual(c.value, {1, 2, 3})
        self.assertEqual(c.relation, 'in')
        self.assertFalse(c.generic)

        c = Constraint.from_spec(spec={'*/a': [1, 2, "10"]})
        self.assertIsInstance(c, And)
        self.assertEqual(len(c.operands), 3)

        c = Constraint.from_spec(spec={'a': {('<', 10), ('>', 11)}})
        self.assertIsInstance(c, Or)
        self.assertEqual(len(c.operands), 2)

        c = Constraint.from_spec(spec=[{'a': 10, 'field': 11}])
        self.assertIsInstance(c, And)
        self.assertEqual(len(c.operands), 2)

        c = Constraint.from_spec(spec=({'a': 10}, {'field': 11}))
        self.assertIsInstance(c, Or)
        self.assertEqual(len(c.operands), 2)
Exemplo n.º 5
0
    def test_strategy_methods(self):
        c = Constraint.from_spec({'*/unit/a': 1, '*/b': 2, 'c': 3})

        self.assertEqual(c.scoped_for_unit_type('unit'),
                         Constraint.from_spec({
                             'a': 1,
                             'c': 3
                         }))
        self.assertEqual(c.scoped_for_unit_type('other'),
                         Constraint.from_spec({'c': 3}))

        for constraint in c.scoped_for_unit_type('unit').operands:
            self.assertFalse(constraint.has_generic)

        mp = MutableMeasureProvider().add_dimension('b')
        self.assertEqual(c.generic_for_provider(mp),
                         Constraint.from_spec({'b': 2}))
Exemplo n.º 6
0
    def _get_strategy_for_metric(self, metric, segment_by, where):
        measures = metric.required_measures
        if metric.required_segmentation:
            segment_by = segment_by + list(set(metric.required_segmentation).difference(segment_by))
        required_marginal_segmentation = list(set(metric.required_marginal_segmentation or []).difference(segment_by))
        segment_by = segment_by + required_marginal_segmentation

        if metric.required_constraints:
            required_constraints = Constraint.from_spec(metric.required_constraints)
            if where is None:
                where = required_constraints
            else:
                where = Constraint.from_spec(where) & required_constraints

        return self.measures.get_strategy(
            metric.unit_type,
            measures=measures,
            segment_by=segment_by,
            where=where
        )
Exemplo n.º 7
0
    def test_constraint_arithmetic(self):
        c1 = Constraint.from_spec({'a': 10})
        c2 = Constraint.from_spec(({'b': 20}, {'c': 30}))
        c3 = Constraint.from_spec({'d': 40, 'e': 50})

        c = c1 & c2
        self.assertIsInstance(c, And)

        c = c1 | c2
        self.assertIsInstance(c, Or)

        c = c1 | c2 & c3
        self.assertIsInstance(c, Or)

        # Commutativity
        self.assertEqual(c1 & c2, c2 & c1)
        self.assertEqual(c1 | c2, c2 | c1)

        # Associativity
        self.assertEqual((c1 & c2) & c3, c1 & (c2 & c3))
        self.assertEqual((c1 | c2) | c3, c1 | (c2 | c3))
Exemplo n.º 8
0
    def test_generic_scoped(self):
        c = Constraint.from_spec({'a': 10})
        self.assertTrue(c.has_scoped)
        self.assertFalse(c.has_generic)
        self.assertEqual(c.scoped, c)

        c = Constraint.from_spec({'*/a': 10})
        self.assertFalse(c.has_scoped)
        self.assertTrue(c.has_generic)
        self.assertEqual(c.generic, c)

        c = Constraint.from_spec({'a': 10, '*/b': 20})
        self.assertTrue(c.has_scoped)
        self.assertTrue(c.has_generic)
        self.assertEqual(c.scoped, Constraint.from_spec({'a': 10}))
        self.assertEqual(c.generic, Constraint.from_spec({'b': 20}))

        self.assertRaises(ValueError, Constraint.from_spec, ({
            '*/b': 20
        }, {
            'c': 30
        }))
Exemplo n.º 9
0
 def wrapped(self,
             unit_type,
             measures=None,
             segment_by=None,
             where=None,
             joins=None,
             **opts):
     unit_type = self.identifier_for_unit(unit_type)
     measures = {} if measures is None else self.resolve(measures,
                                                         kind='measure')
     segment_by = {} if segment_by is None else self.resolve(
         segment_by, kind='dimension')
     where = Constraint.from_spec(where)
     joins = joins or []
     return f(self,
              unit_type,
              measures=measures,
              segment_by=segment_by,
              where=where,
              joins=joins,
              **opts)
Exemplo n.º 10
0
        def wrapped(self,
                    unit_type,
                    measures=None,
                    segment_by=None,
                    where=None,
                    joins=None,
                    stats=True,
                    covariates=False,
                    context=None,
                    stats_registry=None,
                    **opts):
            unit_type = self.identifier_for_unit(unit_type)
            if isinstance(measures, (str, _ProvidedFeature)):
                measures = [measures]
            measures = SequenceMap() if measures is None else self.resolve(
                unit_type=unit_type, features=measures, role='measure')
            if isinstance(segment_by, (str, _ProvidedFeature)):
                segment_by = [segment_by]
            segment_by = SequenceMap() if segment_by is None else self.resolve(
                unit_type=unit_type, features=segment_by, role='dimension')
            where = Constraint.from_spec(where)
            joins = joins or []
            stats_registry = stats_registry or global_stats_registry
            context = context or {}

            # opts = self.opts.process(**opts)
            return f(self,
                     unit_type,
                     measures=measures,
                     segment_by=segment_by,
                     where=where,
                     joins=joins,
                     stats=stats,
                     covariates=covariates,
                     context=context,
                     stats_registry=stats_registry,
                     **opts)
Exemplo n.º 11
0
    def from_spec(cls,
                  registry,
                  unit_type,
                  measures=None,
                  segment_by=None,
                  where=None,
                  **opts):

        # Step 0: Resolve applicable measures and dimensions
        unit_type = registry._resolve_identifier(unit_type)
        measures = [] if measures is None else measures
        segment_by = [] if segment_by is None else segment_by

        measures = [
            registry._resolve_measure(unit_type, measure)
            for measure in measures
        ]

        segment_by = [
            registry._resolve_dimension(unit_type, dimension)
            for dimension in segment_by
        ]

        where = Constraint.from_spec(where)
        where_dimensions = [
            (registry._resolve_dimension(unit_type, dimension).as_implicit)
            for dimension in where.scoped_for_unit_type(unit_type).dimensions
            if dimension not in segment_by
        ]

        # Step 1: Collect measures and dimensions into groups based on current unit_type
        # and next unit_type

        current_evaluation = DimensionBundle(unit_type=unit_type,
                                             dimensions=[],
                                             measures=[])
        next_evaluations = {}

        def collect_dimensions(dimensions,
                               kind='measures',
                               for_constraint=False):
            for dimension in dimensions:
                if not dimension.via:
                    current_evaluation._asdict()[kind].append(dimension)
                elif (  # Handle reverse foreign key joins
                    (for_constraint or kind == 'measures')
                        and dimension.next_unit_type
                        in registry.reverse_foreign_keys_for_unit(unit_type)):
                    next_unit_type = registry._resolve_reverse_foreign_key(
                        unit_type, dimension.next_unit_type)
                    if next_unit_type not in next_evaluations:
                        next_evaluations[next_unit_type] = DimensionBundle(
                            unit_type=unit_type, dimensions=[], measures=[])
                    next_evaluations[next_unit_type]._asdict()[kind].append(
                        dimension.via_next)
                else:
                    next_unit_type = registry._resolve_foreign_key(
                        unit_type, dimension.next_unit_type)
                    if next_unit_type not in next_evaluations:
                        next_evaluations[next_unit_type] = DimensionBundle(
                            unit_type=next_unit_type,
                            dimensions=[],
                            measures=[])
                    next_evaluations[next_unit_type]._asdict()[kind].append(
                        dimension.via_next)

        collect_dimensions(measures, kind='measures')
        collect_dimensions(segment_by, kind='dimensions')
        collect_dimensions(where_dimensions,
                           kind='dimensions',
                           for_constraint=True)

        # Add required dimension for joining in next unit_types
        for dimension_bundle in next_evaluations.values():
            fk = registry._resolve_foreign_key(unit_type,
                                               dimension_bundle.unit_type)
            if fk not in current_evaluation.dimensions:
                current_evaluation.dimensions.append(fk.as_private)

        # Step 2: Create optimal joins for current unit_type

        provisions = registry._find_optimal_provision(
            unit_type=unit_type,
            measures=current_evaluation.measures,
            dimensions=current_evaluation.dimensions)

        evaluations = []
        for provision in provisions:
            generic_constraints = where.generic_for_provider(
                provision.provider)
            generic_constraint_dimensions = [
                provision.provider.resolve(dimension).as_private
                for dimension in generic_constraints.dimensions
                if not provision.dimensions
                or dimension not in provision.dimensions
            ]
            evaluations.append(
                cls(registry=registry,
                    provider=provision.provider,
                    unit_type=unit_type,
                    measures=provision.measures,
                    segment_by=provision.dimensions +
                    generic_constraint_dimensions,
                    where=generic_constraints,
                    join_prefix=provision.join_prefix))

        # Step 3: For each next unit_type, recurse problem and join into above query

        for foreign_key, dim_bundle in next_evaluations.items():
            foreign_strategy = cls.from_spec(registry=registry,
                                             unit_type=foreign_key,
                                             measures=dim_bundle.measures,
                                             segment_by=dim_bundle.dimensions,
                                             where=where.via_next(
                                                 foreign_key.name),
                                             **opts)

            if foreign_key != dim_bundle.unit_type:  # Reverse foreign key join
                foreign_key = dim_bundle.unit_type
                foreign_strategy.unit_type = dim_bundle.unit_type

            added = False
            for sub_strategy in evaluations:
                for dimension in sub_strategy.segment_by:
                    if isinstance(dimension, _StatisticalUnitIdentifier
                                  ) and dimension.matches(foreign_key):
                        sub_strategy.add_join(foreign_key, foreign_strategy)
                        added = True
                        break
            if not added:
                raise RuntimeError("Could not add foreign strategy: {}".format(
                    foreign_strategy))

        strategy = evaluations[0]
        for sub_strategy in evaluations[1:]:
            strategy.add_join(unit_type, sub_strategy)

        strategy.where = And.from_operands(strategy.where,
                                           where.scoped_applicable)

        # Step 4: Mark any resolved where dependencies as private, unless otherwise
        # requested in `segment_by`

        for dimension in strategy.segment_by:
            if dimension.implicit and dimension in where.scoped_applicable.dimensions:
                index = strategy.segment_by.index(dimension)
                strategy.segment_by[index] = strategy.segment_by[
                    index].as_private

        # Step 5: Return EvaluationStrategy, and profit.

        return strategy
Exemplo n.º 12
0
    def test_resolvability(self):
        c = Constraint.from_spec({'unit/a': 1, 'unit/b': 2, 'type/c': 3})
        self.assertTrue(c.via_next('unit').resolvable)

        c = Constraint.from_spec(({'unit/a': 1, 'unit/b': 2}, {'type/c': 3}))
        self.assertFalse(c.via_next('unit').resolvable)
Exemplo n.º 13
0
 def require_constraints(self, **constraints):
     self._required_constraints &= Constraint.from_spec(constraints)
     return self