def evaluate(self, metric, segment_by=None, where=None, dry_run=False, ir_only=False, **opts): if isinstance(metric, list): return [ self.evaluate(m, segment_by=segment_by, where=where, dry_run=dry_run, opts=opts) for m in metric ] metric = self._metrics[metric] if segment_by is None: segment_by = [] if not isinstance(segment_by, list): segment_by = [segment_by] measures = metric.required_measures if metric.required_segmentation: segment_by += list( set(metric.required_segmentation).difference(segment_by)) marginal_dimensions = list( set(metric.marginal_dimensions or []).difference(segment_by)) segment_by += marginal_dimensions if metric.required_constraints: required_constraints = Constraint.from_spec( metric.required_constraints) if where is None: where = required_constraints else: where = Constraint.from_spec(where) & required_constraints strategy = self.measures.evaluate(metric.unit_type, measures=measures, segment_by=segment_by, where=where, dry_run=True, **opts.pop('measure_opts', {})) if dry_run: return strategy result = metric.evaluate(strategy, marginal_dimensions, ir_only=ir_only, **opts) if isinstance(result, pd.Series): return MeasureSeries(result) else: return MeasureDataFrame(result)
def test_strategy_methods(self): c = Constraint.from_spec({'*/unit/a': 1, '*/b': 2, 'c': 3}) self.assertEqual(c.scoped_for_unit_type('unit'), Constraint.from_spec({ 'a': 1, 'c': 3 })) self.assertEqual(c.scoped_for_unit_type('other'), Constraint.from_spec({'c': 3})) mp = MeasureProvider().provides_dimension('b') self.assertEqual(c.generic_for_provider(mp), Constraint.from_spec({'b': 2}))
def wrapped(self, unit_type, measures=None, segment_by=None, where=None, joins=None, stats_registry=None, stats=True, covariates=False, **opts): unit_type = self.identifier_for_unit(unit_type) measures = {} if measures is None else self.resolve( unit_type=unit_type, features=measures, role='measure') segment_by = {} if segment_by is None else self.resolve( unit_type=unit_type, features=segment_by, role='dimension') where = Constraint.from_spec(where) joins = joins or [] stats_registry = stats_registry or global_stats_registry opts = self.opts.process(**opts) return f(self, unit_type, measures=measures, segment_by=segment_by, where=where, joins=joins, stats_registry=stats_registry, stats=stats, covariates=covariates, **opts)
def test_constraint_specs(self): c = Constraint.from_spec(spec={'a': 10}) self.assertIsInstance(c, Constraint) self.assertEqual(c.field, 'a') self.assertEqual(c.value, 10) self.assertEqual(c.relation, '==') self.assertFalse(c.generic) c = Constraint.from_spec(spec={'a': "10"}) self.assertIsInstance(c, Constraint) self.assertEqual(c.field, 'a') self.assertEqual(c.value, "10") self.assertEqual(c.relation, '==') self.assertFalse(c.generic) c = Constraint.from_spec(spec={'*/a': ('>', 10)}) self.assertIsInstance(c, Constraint) self.assertEqual(c.field, 'a') self.assertEqual(c.value, 10) self.assertEqual(c.relation, '>') self.assertTrue(c.generic) c = Constraint.from_spec(spec={'a': '>10'}) self.assertIsInstance(c, Constraint) self.assertEqual(c.field, 'a') self.assertEqual(c.value, '10') self.assertEqual(c.relation, '>') self.assertFalse(c.generic) c = Constraint.from_spec(spec={'a': {1, 2, 3}}) self.assertIsInstance(c, Constraint) self.assertEqual(c.field, 'a') self.assertEqual(c.value, {1, 2, 3}) self.assertEqual(c.relation, 'in') self.assertFalse(c.generic) c = Constraint.from_spec(spec={'*/a': [1, 2, "10"]}) self.assertIsInstance(c, And) self.assertEqual(len(c.operands), 3) c = Constraint.from_spec(spec={'a': {('<', 10), ('>', 11)}}) self.assertIsInstance(c, Or) self.assertEqual(len(c.operands), 2) c = Constraint.from_spec(spec=[{'a': 10, 'field': 11}]) self.assertIsInstance(c, And) self.assertEqual(len(c.operands), 2) c = Constraint.from_spec(spec=({'a': 10}, {'field': 11})) self.assertIsInstance(c, Or) self.assertEqual(len(c.operands), 2)
def test_strategy_methods(self): c = Constraint.from_spec({'*/unit/a': 1, '*/b': 2, 'c': 3}) self.assertEqual(c.scoped_for_unit_type('unit'), Constraint.from_spec({ 'a': 1, 'c': 3 })) self.assertEqual(c.scoped_for_unit_type('other'), Constraint.from_spec({'c': 3})) for constraint in c.scoped_for_unit_type('unit').operands: self.assertFalse(constraint.has_generic) mp = MutableMeasureProvider().add_dimension('b') self.assertEqual(c.generic_for_provider(mp), Constraint.from_spec({'b': 2}))
def _get_strategy_for_metric(self, metric, segment_by, where): measures = metric.required_measures if metric.required_segmentation: segment_by = segment_by + list(set(metric.required_segmentation).difference(segment_by)) required_marginal_segmentation = list(set(metric.required_marginal_segmentation or []).difference(segment_by)) segment_by = segment_by + required_marginal_segmentation if metric.required_constraints: required_constraints = Constraint.from_spec(metric.required_constraints) if where is None: where = required_constraints else: where = Constraint.from_spec(where) & required_constraints return self.measures.get_strategy( metric.unit_type, measures=measures, segment_by=segment_by, where=where )
def test_constraint_arithmetic(self): c1 = Constraint.from_spec({'a': 10}) c2 = Constraint.from_spec(({'b': 20}, {'c': 30})) c3 = Constraint.from_spec({'d': 40, 'e': 50}) c = c1 & c2 self.assertIsInstance(c, And) c = c1 | c2 self.assertIsInstance(c, Or) c = c1 | c2 & c3 self.assertIsInstance(c, Or) # Commutativity self.assertEqual(c1 & c2, c2 & c1) self.assertEqual(c1 | c2, c2 | c1) # Associativity self.assertEqual((c1 & c2) & c3, c1 & (c2 & c3)) self.assertEqual((c1 | c2) | c3, c1 | (c2 | c3))
def test_generic_scoped(self): c = Constraint.from_spec({'a': 10}) self.assertTrue(c.has_scoped) self.assertFalse(c.has_generic) self.assertEqual(c.scoped, c) c = Constraint.from_spec({'*/a': 10}) self.assertFalse(c.has_scoped) self.assertTrue(c.has_generic) self.assertEqual(c.generic, c) c = Constraint.from_spec({'a': 10, '*/b': 20}) self.assertTrue(c.has_scoped) self.assertTrue(c.has_generic) self.assertEqual(c.scoped, Constraint.from_spec({'a': 10})) self.assertEqual(c.generic, Constraint.from_spec({'b': 20})) self.assertRaises(ValueError, Constraint.from_spec, ({ '*/b': 20 }, { 'c': 30 }))
def wrapped(self, unit_type, measures=None, segment_by=None, where=None, joins=None, **opts): unit_type = self.identifier_for_unit(unit_type) measures = {} if measures is None else self.resolve(measures, kind='measure') segment_by = {} if segment_by is None else self.resolve( segment_by, kind='dimension') where = Constraint.from_spec(where) joins = joins or [] return f(self, unit_type, measures=measures, segment_by=segment_by, where=where, joins=joins, **opts)
def wrapped(self, unit_type, measures=None, segment_by=None, where=None, joins=None, stats=True, covariates=False, context=None, stats_registry=None, **opts): unit_type = self.identifier_for_unit(unit_type) if isinstance(measures, (str, _ProvidedFeature)): measures = [measures] measures = SequenceMap() if measures is None else self.resolve( unit_type=unit_type, features=measures, role='measure') if isinstance(segment_by, (str, _ProvidedFeature)): segment_by = [segment_by] segment_by = SequenceMap() if segment_by is None else self.resolve( unit_type=unit_type, features=segment_by, role='dimension') where = Constraint.from_spec(where) joins = joins or [] stats_registry = stats_registry or global_stats_registry context = context or {} # opts = self.opts.process(**opts) return f(self, unit_type, measures=measures, segment_by=segment_by, where=where, joins=joins, stats=stats, covariates=covariates, context=context, stats_registry=stats_registry, **opts)
def from_spec(cls, registry, unit_type, measures=None, segment_by=None, where=None, **opts): # Step 0: Resolve applicable measures and dimensions unit_type = registry._resolve_identifier(unit_type) measures = [] if measures is None else measures segment_by = [] if segment_by is None else segment_by measures = [ registry._resolve_measure(unit_type, measure) for measure in measures ] segment_by = [ registry._resolve_dimension(unit_type, dimension) for dimension in segment_by ] where = Constraint.from_spec(where) where_dimensions = [ (registry._resolve_dimension(unit_type, dimension).as_implicit) for dimension in where.scoped_for_unit_type(unit_type).dimensions if dimension not in segment_by ] # Step 1: Collect measures and dimensions into groups based on current unit_type # and next unit_type current_evaluation = DimensionBundle(unit_type=unit_type, dimensions=[], measures=[]) next_evaluations = {} def collect_dimensions(dimensions, kind='measures', for_constraint=False): for dimension in dimensions: if not dimension.via: current_evaluation._asdict()[kind].append(dimension) elif ( # Handle reverse foreign key joins (for_constraint or kind == 'measures') and dimension.next_unit_type in registry.reverse_foreign_keys_for_unit(unit_type)): next_unit_type = registry._resolve_reverse_foreign_key( unit_type, dimension.next_unit_type) if next_unit_type not in next_evaluations: next_evaluations[next_unit_type] = DimensionBundle( unit_type=unit_type, dimensions=[], measures=[]) next_evaluations[next_unit_type]._asdict()[kind].append( dimension.via_next) else: next_unit_type = registry._resolve_foreign_key( unit_type, dimension.next_unit_type) if next_unit_type not in next_evaluations: next_evaluations[next_unit_type] = DimensionBundle( unit_type=next_unit_type, dimensions=[], measures=[]) next_evaluations[next_unit_type]._asdict()[kind].append( dimension.via_next) collect_dimensions(measures, kind='measures') collect_dimensions(segment_by, kind='dimensions') collect_dimensions(where_dimensions, kind='dimensions', for_constraint=True) # Add required dimension for joining in next unit_types for dimension_bundle in next_evaluations.values(): fk = registry._resolve_foreign_key(unit_type, dimension_bundle.unit_type) if fk not in current_evaluation.dimensions: current_evaluation.dimensions.append(fk.as_private) # Step 2: Create optimal joins for current unit_type provisions = registry._find_optimal_provision( unit_type=unit_type, measures=current_evaluation.measures, dimensions=current_evaluation.dimensions) evaluations = [] for provision in provisions: generic_constraints = where.generic_for_provider( provision.provider) generic_constraint_dimensions = [ provision.provider.resolve(dimension).as_private for dimension in generic_constraints.dimensions if not provision.dimensions or dimension not in provision.dimensions ] evaluations.append( cls(registry=registry, provider=provision.provider, unit_type=unit_type, measures=provision.measures, segment_by=provision.dimensions + generic_constraint_dimensions, where=generic_constraints, join_prefix=provision.join_prefix)) # Step 3: For each next unit_type, recurse problem and join into above query for foreign_key, dim_bundle in next_evaluations.items(): foreign_strategy = cls.from_spec(registry=registry, unit_type=foreign_key, measures=dim_bundle.measures, segment_by=dim_bundle.dimensions, where=where.via_next( foreign_key.name), **opts) if foreign_key != dim_bundle.unit_type: # Reverse foreign key join foreign_key = dim_bundle.unit_type foreign_strategy.unit_type = dim_bundle.unit_type added = False for sub_strategy in evaluations: for dimension in sub_strategy.segment_by: if isinstance(dimension, _StatisticalUnitIdentifier ) and dimension.matches(foreign_key): sub_strategy.add_join(foreign_key, foreign_strategy) added = True break if not added: raise RuntimeError("Could not add foreign strategy: {}".format( foreign_strategy)) strategy = evaluations[0] for sub_strategy in evaluations[1:]: strategy.add_join(unit_type, sub_strategy) strategy.where = And.from_operands(strategy.where, where.scoped_applicable) # Step 4: Mark any resolved where dependencies as private, unless otherwise # requested in `segment_by` for dimension in strategy.segment_by: if dimension.implicit and dimension in where.scoped_applicable.dimensions: index = strategy.segment_by.index(dimension) strategy.segment_by[index] = strategy.segment_by[ index].as_private # Step 5: Return EvaluationStrategy, and profit. return strategy
def test_resolvability(self): c = Constraint.from_spec({'unit/a': 1, 'unit/b': 2, 'type/c': 3}) self.assertTrue(c.via_next('unit').resolvable) c = Constraint.from_spec(({'unit/a': 1, 'unit/b': 2}, {'type/c': 3})) self.assertFalse(c.via_next('unit').resolvable)
def require_constraints(self, **constraints): self._required_constraints &= Constraint.from_spec(constraints) return self