def test_label_binarizer_set_label_encoding(): lb = LabelBinarizer(neg_label=-2, pos_label=0) # two-class case with pos_label=0 inp = np.array([0, 1, 1, 0]) expected = np.array([[-2, 0, 0, -2]]).T got = lb.fit_transform(inp) assert_array_equal(expected, got) assert_array_equal(lb.inverse_transform(got), inp) lb = LabelBinarizer(neg_label=-2, pos_label=2) # multi-class case inp = np.array([3, 2, 1, 2, 0]) expected = np.array( [ [-2, -2, -2, +2], [-2, -2, +2, -2], [-2, +2, -2, -2], [-2, -2, +2, -2], [+2, -2, -2, -2], ] ) got = lb.fit_transform(inp) assert_array_equal(expected, got) assert_array_equal(lb.inverse_transform(got), inp)
def test_label_binarizer(): # one-class case defaults to negative label # For dense case: inp = ["pos", "pos", "pos", "pos"] lb = LabelBinarizer(sparse_output=False) expected = np.array([[0, 0, 0, 0]]).T got = lb.fit_transform(inp) assert_array_equal(lb.classes_, ["pos"]) assert_array_equal(expected, got) assert_array_equal(lb.inverse_transform(got), inp) # For sparse case: lb = LabelBinarizer(sparse_output=True) got = lb.fit_transform(inp) assert issparse(got) assert_array_equal(lb.classes_, ["pos"]) assert_array_equal(expected, got.toarray()) assert_array_equal(lb.inverse_transform(got.toarray()), inp) lb = LabelBinarizer(sparse_output=False) # two-class case inp = ["neg", "pos", "pos", "neg"] expected = np.array([[0, 1, 1, 0]]).T got = lb.fit_transform(inp) assert_array_equal(lb.classes_, ["neg", "pos"]) assert_array_equal(expected, got) to_invert = np.array([[1, 0], [0, 1], [0, 1], [1, 0]]) assert_array_equal(lb.inverse_transform(to_invert), inp) # multi-class case inp = ["spam", "ham", "eggs", "ham", "0"] expected = np.array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]]) got = lb.fit_transform(inp) assert_array_equal(lb.classes_, ['0', 'eggs', 'ham', 'spam']) assert_array_equal(expected, got) assert_array_equal(lb.inverse_transform(got), inp)
def test_label_binarizer_unseen_labels(): lb = LabelBinarizer() expected = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) got = lb.fit_transform(['b', 'd', 'e']) assert_array_equal(expected, got) expected = np.array([[0, 0, 0], [1, 0, 0], [0, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 0]]) got = lb.transform(['a', 'b', 'c', 'd', 'e', 'f']) assert_array_equal(expected, got)
def test_label_binarizer_unseen_labels(): lb = LabelBinarizer() expected = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) got = lb.fit_transform(["b", "d", "e"]) assert_array_equal(expected, got) expected = np.array([[0, 0, 0], [1, 0, 0], [0, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 0]]) got = lb.transform(["a", "b", "c", "d", "e", "f"]) assert_array_equal(expected, got)
def check_binarized_results(y, classes, pos_label, neg_label, expected): for sparse_output in [True, False]: if (pos_label == 0 or neg_label != 0) and sparse_output: with pytest.raises(ValueError): label_binarize( y, classes=classes, neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output, ) continue # check label_binarize binarized = label_binarize( y, classes=classes, neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output, ) assert_array_equal(toarray(binarized), expected) assert issparse(binarized) == sparse_output # check inverse y_type = type_of_target(y) if y_type == "multiclass": inversed = _inverse_binarize_multiclass(binarized, classes=classes) else: inversed = _inverse_binarize_thresholding( binarized, output_type=y_type, classes=classes, threshold=((neg_label + pos_label) / 2.0), ) assert_array_equal(toarray(inversed), toarray(y)) # Check label binarizer lb = LabelBinarizer( neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output ) binarized = lb.fit_transform(y) assert_array_equal(toarray(binarized), expected) assert issparse(binarized) == sparse_output inverse_output = lb.inverse_transform(binarized) assert_array_equal(toarray(inverse_output), toarray(y)) assert issparse(inverse_output) == issparse(y)