def test_DesignMatrixBuilder_subset(): # For each combination of: # formula, term names, term objects, mixed term name and term objects # check that results match subset of full build # and that removed variables don't hurt all_data = {"x": [1, 2], "y": [[3.1, 3.2], [4.1, 4.2]], "z": [5, 6]} all_terms = make_termlist("x", "y", "z") def iter_maker(): yield all_data all_builder = design_matrix_builders([all_terms], iter_maker)[0] full_matrix = build_design_matrices([all_builder], all_data)[0] def t(which_terms, variables, columns): sub_builder = all_builder.subset(which_terms) sub_data = {} for variable in variables: sub_data[variable] = all_data[variable] sub_matrix = build_design_matrices([sub_builder], sub_data)[0] sub_full_matrix = full_matrix[:, columns] if not isinstance(which_terms, six.string_types): assert len(which_terms) == len(sub_builder.design_info.terms) assert np.array_equal(sub_matrix, sub_full_matrix) t("~ 0 + x + y + z", ["x", "y", "z"], slice(None)) t(["x", "y", "z"], ["x", "y", "z"], slice(None)) if six.PY2: t([unicode("x"), unicode("y"), unicode("z")], ["x", "y", "z"], slice(None)) t(all_terms, ["x", "y", "z"], slice(None)) t([all_terms[0], "y", all_terms[2]], ["x", "y", "z"], slice(None)) t("~ 0 + x + z", ["x", "z"], [0, 3]) t(["x", "z"], ["x", "z"], [0, 3]) if six.PY2: t([unicode("x"), unicode("z")], ["x", "z"], [0, 3]) t([all_terms[0], all_terms[2]], ["x", "z"], [0, 3]) t([all_terms[0], "z"], ["x", "z"], [0, 3]) t("~ 0 + z + x", ["x", "z"], [3, 0]) t(["z", "x"], ["x", "z"], [3, 0]) t([six.text_type("z"), six.text_type("x")], ["x", "z"], [3, 0]) t([all_terms[2], all_terms[0]], ["x", "z"], [3, 0]) t([all_terms[2], "x"], ["x", "z"], [3, 0]) t("~ 0 + y", ["y"], [1, 2]) t(["y"], ["y"], [1, 2]) t([six.text_type("y")], ["y"], [1, 2]) t([all_terms[1]], ["y"], [1, 2]) # Formula can't have a LHS assert_raises(PatsyError, all_builder.subset, "a ~ a") # Term must exist assert_raises(PatsyError, all_builder.subset, "~ asdf") assert_raises(PatsyError, all_builder.subset, ["asdf"]) assert_raises(PatsyError, all_builder.subset, [Term(["asdf"])])
def test_data_types(): basic_dict = {"a": ["a1", "a2", "a1", "a2"], "x": [1, 2, 3, 4]} # On Python 2, this is identical to basic_dict: basic_dict_bytes = dict(basic_dict) basic_dict_bytes["a"] = [s.encode("ascii") for s in basic_dict_bytes["a"]] # On Python 3, this is identical to basic_dict: basic_dict_unicode = {"a": ["a1", "a2", "a1", "a2"], "x": [1, 2, 3, 4]} basic_dict_unicode = dict(basic_dict) basic_dict_unicode["a"] = [six.text_type(s) for s in basic_dict_unicode["a"]] structured_array_bytes = np.array(list(zip(basic_dict["a"], basic_dict["x"])), dtype=[("a", "S2"), ("x", int)]) structured_array_unicode = np.array(list(zip(basic_dict["a"], basic_dict["x"])), dtype=[("a", "U2"), ("x", int)]) recarray_bytes = structured_array_bytes.view(np.recarray) recarray_unicode = structured_array_unicode.view(np.recarray) datas = [basic_dict, structured_array_bytes, structured_array_unicode, recarray_bytes, recarray_unicode] if have_pandas: df_bytes = pandas.DataFrame(basic_dict_bytes) datas.append(df_bytes) df_unicode = pandas.DataFrame(basic_dict_unicode) datas.append(df_unicode) for data in datas: m = make_matrix(data, 4, [["a"], ["a", "x"]], column_names=["a[a1]", "a[a2]", "a[a1]:x", "a[a2]:x"]) assert np.allclose(m, [[1, 0, 1, 0], [0, 1, 0, 2], [1, 0, 3, 0], [0, 1, 0, 4]])