示例#1
0
    def test_factor_evaluation_edge_cases(self, materializer):
        # Test that categorical kinds are set if type would otherwise be numerical
        ev_factor = materializer._evaluate_factor(Factor('a',
                                                         eval_method='lookup',
                                                         kind='categorical'),
                                                  ModelSpec([]),
                                                  drop_rows=set())
        assert ev_factor.kind.value == 'categorical'

        # Test that other kind mismatches result in an exception
        materializer.factor_cache = {}
        with pytest.raises(FactorEncodingError):
            materializer._evaluate_factor(Factor('A',
                                                 eval_method='lookup',
                                                 kind='numerical'),
                                          ModelSpec([]),
                                          drop_rows=[])

        # Test that if an encoding has already been determined, that an exception is raised
        # if the new encoding does not match
        materializer.factor_cache = {}
        with pytest.raises(FactorEncodingError):
            materializer._evaluate_factor(
                Factor('a', eval_method='lookup', kind='numerical'),
                ModelSpec([], encoder_state={'a': ('categorical', {})}),
                drop_rows=[])

        # Test that invalid (kind == UNKNOWN) factors raise errors
        materializer.factor_cache = {}
        with pytest.raises(FactorEvaluationError):
            assert materializer._evaluate_factor(Factor('a'),
                                                 ModelSpec([]),
                                                 drop_rows=set())
示例#2
0
    def test_to_terms(self):
        terms = Factor('a').to_terms()
        assert len(terms) == 1

        term = next(iter(terms))
        assert len(term.factors) == 1
        assert next(iter(term.factors)) == Factor('a')
示例#3
0
    def test_attributes(self):
        assert Factor('a').kind is Factor.Kind.UNKNOWN
        assert Factor('a', kind='constant').kind is Factor.Kind.CONSTANT

        assert Factor('a').eval_method is Factor.EvalMethod.UNKNOWN
        assert Factor(
            'a', eval_method='lookup').eval_method is Factor.EvalMethod.LOOKUP
示例#4
0
    def test_sort(self):
        a, b, c = Factor('a'), Factor('b'), Factor('c')

        assert a < b
        assert b < c
        assert a < c

        with pytest.raises(TypeError):
            a < 1
示例#5
0
 def evaled_factors(self):
     return [
         EvaluatedFactor(Factor('A'),
                         pandas.Series([1, 2, 3, 4]),
                         kind='categorical',
                         spans_intercept=True),
         EvaluatedFactor(Factor('b'),
                         pandas.Series([1, 2, 3, 4]),
                         kind='numerical',
                         spans_intercept=False),
     ]
示例#6
0
def differentiate_term(term, vars, use_sympy=False):
    factors = term.factors

    for var in vars:
        affected_factors = set(
            factor for factor in factors
            if var in _factor_symbols(factor, use_sympy=use_sympy))
        if not affected_factors:
            return Term({Factor('0', eval_method='literal')})
        factors = factors.difference(affected_factors).union(
            _differentiate_factors(affected_factors, var, use_sympy=use_sympy))

    return Term(factors or {Factor('1', eval_method='literal')})
示例#7
0
 def test_equality(self):
     assert Factor('a') == 'a'
     assert Factor('a') != 1
     assert Factor('a', kind='constant') == Factor('a', kind='numerical')
     assert Factor('a',
                   eval_method='literal') == Factor('a',
                                                    eval_method='lookup')
示例#8
0
def _differentiate_factors(factors, var, use_sympy=False):
    if use_sympy:
        expr = sympy.S('(' + ') * ('.join(factor.expr for factor in factors) +
                       ')').diff(var)
        eval_method = 'python'
    else:
        assert len(factors) == 1
        expr = 1
        eval_method = next(iter(factors)).eval_method

    if expr == 1:
        return set()
    return {Factor(f'({str(expr)})', eval_method=eval_method)}
示例#9
0
    def test_encoding_edge_cases(self, materializer):
        # Verify that constant encoding works well
        assert (list(
            materializer._encode_evaled_factor(
                factor=EvaluatedFactor(
                    Factor("10", eval_method='literal', kind='constant'),
                    values=10,
                    kind='constant',
                ),
                spec=ModelSpec([]),
                drop_rows=[],
            )['10']) == [10, 10, 10])

        # Verify that encoding of nested dictionaries works well
        assert (list(
            materializer._encode_evaled_factor(
                factor=EvaluatedFactor(
                    Factor("A", eval_method='python', kind='numerical'),
                    values={
                        'a': [1, 2, 3],
                        'b': [4, 5, 6],
                        '__metadata__': None
                    },
                    kind='numerical',
                ),
                spec=ModelSpec([]),
                drop_rows=[],
            )['A[a]']) == [1, 2, 3])

        assert (list(
            materializer._encode_evaled_factor(
                factor=EvaluatedFactor(
                    Factor("B", eval_method='python', kind='categorical'),
                    values={'a': ['a', 'b', 'c']},
                    kind='categorical',
                ),
                spec=ModelSpec([]),
                drop_rows=[],
            )) == ['B[a][T.a]', 'B[a][T.b]', 'B[a][T.c]'])
示例#10
0
    def _evaluate_factor(self, factor, spec, drop_rows):
        if factor.expr not in self.factor_cache:
            if factor.eval_method.value == 'lookup':
                value = self._lookup(factor.expr)
            elif factor.eval_method.value == 'python':
                value = self._evaluate(factor.expr, factor.metadata, spec)
            elif factor.eval_method.value == 'literal':
                value = EvaluatedFactor(factor,
                                        self._evaluate(factor.expr,
                                                       factor.metadata, spec),
                                        kind='constant')
            else:
                raise FactorEvaluationError(
                    f"Evaluation method {factor.eval_method.value} not recognised for factor {factor.expr}."
                )

            if not isinstance(value, EvaluatedFactor):
                if isinstance(value, dict) and '__kind__' in value:
                    kind = value['__kind__']
                    spans_intercept = value.get('__spans_intercept__', False)
                elif self._is_categorical(value):
                    kind = 'categorical'
                    spans_intercept = True
                else:
                    kind = 'numerical'
                    spans_intercept = False
                if factor.kind is not Factor.Kind.UNKNOWN and factor.kind.value != kind:
                    if factor.kind.value == 'categorical':
                        kind = factor.kind.value
                    else:
                        raise FactorEncodingError(
                            f"Factor is expecting to be of kind '{factor.kind.value}' but is actually of kind '{kind}'."
                        )
                if factor.expr in spec.encoder_state and Factor.Kind(
                        kind) is not spec.encoder_state[factor.expr][0]:
                    raise FactorEncodingError(
                        f"Factor kind `{kind}` does not match model specification of `{spec.encoder_state[factor.expr][0]}`."
                    )
                value = EvaluatedFactor(
                    factor=factor,
                    values=value,
                    kind=kind,
                    spans_intercept=spans_intercept,
                )
            self._check_for_nulls(factor.expr, value.values, spec.na_action,
                                  drop_rows)
            self.factor_cache[factor.expr] = value
        return self.factor_cache[factor.expr]
示例#11
0
def _differentiate_factors(factors, var, use_sympy=False):
    if use_sympy:
        try:
            import sympy
            expr = sympy.S('(' + ') * ('.join(factor.expr
                                              for factor in factors) +
                           ')').diff(var)
            eval_method = 'python'
        except ImportError:  # pragma: no cover
            raise ImportError(
                "`sympy` is not available. Install it using `pip install formulaic[calculus]` or `pip install sympy`."
            )
    else:
        assert len(factors) == 1
        expr = 1
        eval_method = next(iter(factors)).eval_method

    if expr == 1:
        return set()
    return {Factor(f'({str(expr)})', eval_method=eval_method)}
 def scoped_factor(self):
     return ScopedFactor(Factor('a'))
    def test_sort(self, scoped_factor, scoped_factor_reduced):
        assert scoped_factor_reduced < scoped_factor
        assert scoped_factor < ScopedFactor(Factor('b'))

        with pytest.raises(TypeError):
            scoped_factor < 1
 def scoped_factor_reduced(self):
     return ScopedFactor(Factor('a'), reduced=True)
示例#15
0
 def test_hash(self):
     assert hash(Factor('a')) == hash('a')
示例#16
0
 def term1(self):
     return Term([Factor('c'), Factor('b')])
示例#17
0
 def term2(self):
     return Term([Factor('c'), Factor('d')])
示例#18
0
 def factor_lookup(self):
     return Factor('a', kind='lookup')
 def test_equality(self, ev_factor):
     assert ev_factor == EvaluatedFactor(Factor('a'), [4, 5, 6],
                                         kind='numerical')
     assert ev_factor != 'a'
示例#20
0
 def test_repr(self):
     assert repr(Factor('a')) == 'a'
示例#21
0
 def factor_unknown(self):
     return Factor('unknown')
 def test_repr(self, ev_factor):
     assert repr(ev_factor) == repr(Factor('a'))
示例#23
0
 def kind(self, kind):
     if not kind or kind == 'unknown':
         raise ValueError(
             "`EvaluatedFactor` instances must have a known kind.")
     self._kind = Factor.Kind(kind)
    def test_sort(self, ev_factor):
        assert ev_factor < EvaluatedFactor(Factor('b'), [4, 5, 6],
                                           kind='numerical')

        with pytest.raises(TypeError):
            ev_factor < 1
示例#25
0
 def factor_literal(self):
     return Factor('"string"', kind='literal')