Exemplo n.º 1
0
def coef_names(terms, metadata, code_lengths):
    assert type(terms) == OrderedSet
    assert type(metadata) == Metadata
    coded_interactions = code_terms(terms, metadata)
    product_cols = join(coded_interaction_to_product_cols(code, metadata, code_lengths)
                        for code in coded_interactions)
    return [product_col_to_coef_name(pcol) for pcol in product_cols]
Exemplo n.º 2
0
def allfactors(formula):
    assert type(formula) == Formula

    def all_from_terms(terms):
        return join(list(term.factors) for term in terms)

    return ([formula.response] + all_from_terms(formula.terms) + join(
        all_from_terms(group.terms) + group.columns
        for group in formula.groups))
Exemplo n.º 3
0
def designmatrix(terms, df, metadata, contrasts):
    assert type(terms) == OrderedSet
    coded_interactions = code_terms(terms, metadata)
    product_cols = join(coded_interaction_to_product_cols(code, metadata, code_lengths(contrasts))
                        for code in coded_interactions)
    N = len(df)
    arrs = [execute_product_col(pcol, df, metadata, contrasts) for pcol in product_cols]
    X = np.stack(arrs, axis=1) if arrs else np.empty((N, 0))
    assert X.shape[0] == N
    if X.shape[0] > 0 and X.shape[1] > 0 and np.linalg.matrix_rank(X) != X.shape[1]:
        print('WARNING: Design matrix may not be full rank.')
    return X
Exemplo n.º 4
0
def code_group_of_terms(terms, shared_numeric_factors):
    assert type(terms) == list
    assert all(type(term) == Term for term in terms)
    assert type(shared_numeric_factors) == OrderedSet

    # It's also the case that each term should contain no numeric
    # factors not mentions in `shared_numeric_factors`, but that is
    # not checked here.
    assert all(
        all((factor in term.factors) for factor in shared_numeric_factors)
        for term in terms)

    def drop_numeric_factors(term):
        factors = [f for f in term.factors if not f in shared_numeric_factors]
        return Term(OrderedSet(*factors))

    categorical_terms = [drop_numeric_factors(term) for term in terms]
    codings_for_terms = code_categorical_terms(categorical_terms)

    num_codings_dict = {f: NumericCoding(f) for f in shared_numeric_factors}

    # This adds codings for the shared numeric factors to the coding
    # of a categorical interaction, respecting the factor order in the
    # source term.
    #
    # e.g. term   = Term(<a,x,b>)
    #      coding = (b+,)
    # Returns:
    #      (x,b+)
    # (Assuming shared numeric factors is ['x'].)
    #
    def extend_with_numeric_factors(term, coding):
        cat_codings_dict = {c.factor: c for c in coding}
        # This gives us a dictionary that maps from factor names
        # (factors in coding U shared numeric factors) to codings
        # (e.g. CategoricalCoding, NumericCoding).
        codings_dict = dict(cat_codings_dict, **num_codings_dict)
        # We then grab all of these codings following the factor order
        # in the term. (Note that some factors in the term may not
        # appear in the coding.)
        out = [codings_dict[f] for f in term.factors if f in codings_dict]
        assert len(out) == len(codings_dict)
        return out

    assert len(terms) == len(
        codings_for_terms)  # complain if zip will drop things
    return join(
        [[extend_with_numeric_factors(term, coding) for coding in codings]
         for (term, codings) in zip(terms, codings_for_terms)])
Exemplo n.º 5
0
def code_terms(terms, metadata):
    assert type(metadata) == Metadata
    groups = partition_terms(terms, metadata)
    return join(
        code_group_of_terms(sort_by_order(terms), shared_num_factors)
        for shared_num_factors, terms in groups)
Exemplo n.º 6
0
def leaves(node, path=[]):
    this = [(node, path)] if node.is_param else []
    rest = join(leaves(n, path + [n.name]) for n in node.children)
    return this + rest
Exemplo n.º 7
0
 def all_from_terms(terms):
     return join(list(term.factors) for term in terms)