Пример #1
0
def partition_terms(terms, metadata):
    assert type(terms) == OrderedSet
    assert type(metadata) == Metadata

    def numeric_factors(term):
        factors = [f for f in term.factors if is_numeric_col(metadata.column(f))]
        return OrderedSet(*factors)

    # The idea here is to store the full term (including the numeric
    # factors) as a way of remembering the order in which the numeric
    # and numeric factors originally appeared. I think Patsy does
    # something like this.
    groups = group([(numeric_factors(term), term) for term in terms])
    # Sort the groups. First comes the group containing no numeric
    # factors. The remaining groups appear in the order in which a
    # term containing exactly those numeric factors associated with
    # the group first appears in `terms`. (The latter is guaranteed by
    # the fact that `group` is order aware.
    empty_set = OrderedSet()
    first, rest = partition(lambda kv: kv[0] != empty_set, groups.items())
    return first + rest
Пример #2
0
              Z_0=np.array([[-1.], [-1.], [1.]]),
              J_0=np.array([0, 1, 1]),
              y_obs=np.array([1., 2., 3.]))),
    ])
def test_designmatrix(formula_str, df, metadata_cols, contrasts, expected):
    metadata = metadata_from_cols(
        metadata_cols) if metadata_cols is not None else metadata_from_df(df)
    data = makedata(parse(formula_str), df, metadata, contrasts)
    assert set(data.keys()) == set(expected.keys())
    for k in expected.keys():
        assert data[k].dtype == expected[k].dtype
        assert_equal(data[k], expected[k])


@pytest.mark.parametrize('formula_str, expected_formula', [
    ('y ~ 1', Formula('y', OrderedSet(_1), [])),
    ('y ~ 1 + x', Formula('y', OrderedSet(_1, Term(OrderedSet('x'))), [])),
    ('y ~ x + x', Formula('y', OrderedSet(Term(OrderedSet('x'))), [])),
    ('y ~ x1 : x2', Formula('y', OrderedSet(Term(OrderedSet('x1', 'x2'))),
                            [])),
    ('y ~ (x1 + x2) : x3',
     Formula(
         'y',
         OrderedSet(Term(OrderedSet('x1', 'x3')), Term(OrderedSet(
             'x2', 'x3'))), [])),
])
def test_parser(formula_str, expected_formula):
    formula = parse(formula_str)
    assert formula == expected_formula

Пример #3
0
 def numeric_factors(term):
     factors = [
         f for f in term.factors if is_numeric_col(metadata.column(f))
     ]
     return OrderedSet(*factors)
Пример #4
0
 def drop_numeric_factors(term):
     factors = [f for f in term.factors if not f in shared_numeric_factors]
     return Term(OrderedSet(*factors))