Пример #1
0
def test_DesignMatrixBuilder_subset():
    # For each combination of:
    #   formula, term names, term objects, mixed term name and term objects
    # check that results match subset of full build
    # and that removed variables don't hurt
    all_data = {"x": [1, 2], "y": [[3.1, 3.2], [4.1, 4.2]], "z": [5, 6]}
    all_terms = make_termlist("x", "y", "z")

    def iter_maker():
        yield all_data

    all_builder = design_matrix_builders([all_terms], iter_maker)[0]
    full_matrix = build_design_matrices([all_builder], all_data)[0]

    def t(which_terms, variables, columns):
        sub_builder = all_builder.subset(which_terms)
        sub_data = {}
        for variable in variables:
            sub_data[variable] = all_data[variable]
        sub_matrix = build_design_matrices([sub_builder], sub_data)[0]
        sub_full_matrix = full_matrix[:, columns]
        if not isinstance(which_terms, six.string_types):
            assert len(which_terms) == len(sub_builder.design_info.terms)
        assert np.array_equal(sub_matrix, sub_full_matrix)

    t("~ 0 + x + y + z", ["x", "y", "z"], slice(None))
    t(["x", "y", "z"], ["x", "y", "z"], slice(None))
    if six.PY2:
        t([unicode("x"), unicode("y"), unicode("z")], ["x", "y", "z"], slice(None))
    t(all_terms, ["x", "y", "z"], slice(None))
    t([all_terms[0], "y", all_terms[2]], ["x", "y", "z"], slice(None))

    t("~ 0 + x + z", ["x", "z"], [0, 3])
    t(["x", "z"], ["x", "z"], [0, 3])
    if six.PY2:
        t([unicode("x"), unicode("z")], ["x", "z"], [0, 3])
    t([all_terms[0], all_terms[2]], ["x", "z"], [0, 3])
    t([all_terms[0], "z"], ["x", "z"], [0, 3])

    t("~ 0 + z + x", ["x", "z"], [3, 0])
    t(["z", "x"], ["x", "z"], [3, 0])
    t([six.text_type("z"), six.text_type("x")], ["x", "z"], [3, 0])
    t([all_terms[2], all_terms[0]], ["x", "z"], [3, 0])
    t([all_terms[2], "x"], ["x", "z"], [3, 0])

    t("~ 0 + y", ["y"], [1, 2])
    t(["y"], ["y"], [1, 2])
    t([six.text_type("y")], ["y"], [1, 2])
    t([all_terms[1]], ["y"], [1, 2])

    # Formula can't have a LHS
    assert_raises(PatsyError, all_builder.subset, "a ~ a")
    # Term must exist
    assert_raises(PatsyError, all_builder.subset, "~ asdf")
    assert_raises(PatsyError, all_builder.subset, ["asdf"])
    assert_raises(PatsyError, all_builder.subset, [Term(["asdf"])])
Пример #2
0
def test_data_types():
    basic_dict = {"a": ["a1", "a2", "a1", "a2"], "x": [1, 2, 3, 4]}
    # On Python 2, this is identical to basic_dict:
    basic_dict_bytes = dict(basic_dict)
    basic_dict_bytes["a"] = [s.encode("ascii") for s in basic_dict_bytes["a"]]
    # On Python 3, this is identical to basic_dict:
    basic_dict_unicode = {"a": ["a1", "a2", "a1", "a2"], "x": [1, 2, 3, 4]}
    basic_dict_unicode = dict(basic_dict)
    basic_dict_unicode["a"] = [six.text_type(s) for s in basic_dict_unicode["a"]]

    structured_array_bytes = np.array(list(zip(basic_dict["a"], basic_dict["x"])), dtype=[("a", "S2"), ("x", int)])
    structured_array_unicode = np.array(list(zip(basic_dict["a"], basic_dict["x"])), dtype=[("a", "U2"), ("x", int)])
    recarray_bytes = structured_array_bytes.view(np.recarray)
    recarray_unicode = structured_array_unicode.view(np.recarray)
    datas = [basic_dict, structured_array_bytes, structured_array_unicode, recarray_bytes, recarray_unicode]
    if have_pandas:
        df_bytes = pandas.DataFrame(basic_dict_bytes)
        datas.append(df_bytes)
        df_unicode = pandas.DataFrame(basic_dict_unicode)
        datas.append(df_unicode)
    for data in datas:
        m = make_matrix(data, 4, [["a"], ["a", "x"]], column_names=["a[a1]", "a[a2]", "a[a1]:x", "a[a2]:x"])
        assert np.allclose(m, [[1, 0, 1, 0], [0, 1, 0, 2], [1, 0, 3, 0], [0, 1, 0, 4]])