def test_label_binarizer_errors(): # Check that invalid arguments yield ValueError one_class = np.array([0, 0, 0, 0]) lb = LabelBinarizer().fit(one_class) multi_label = [(2, 3), (0, ), (0, 2)] with pytest.raises(ValueError): lb.transform(multi_label) lb = LabelBinarizer() with pytest.raises(ValueError): lb.transform([]) with pytest.raises(ValueError): lb.inverse_transform([]) with pytest.raises(ValueError): LabelBinarizer(neg_label=2, pos_label=1) with pytest.raises(ValueError): LabelBinarizer(neg_label=2, pos_label=2) with pytest.raises(ValueError): LabelBinarizer(neg_label=1, pos_label=2, sparse_output=True) # Fail on y_type with pytest.raises(ValueError): _inverse_binarize_thresholding(y=csr_matrix([[1, 2], [2, 1]]), output_type="foo", classes=[1, 2], threshold=0) # Sequence of seq type should raise ValueError y_seq_of_seqs = [[], [1, 2], [3], [0, 1, 3], [2]] with pytest.raises(ValueError): LabelBinarizer().fit_transform(y_seq_of_seqs) # Fail on the number of classes with pytest.raises(ValueError): _inverse_binarize_thresholding(y=csr_matrix([[1, 2], [2, 1]]), output_type="foo", classes=[1, 2, 3], threshold=0) # Fail on the dimension of 'binary' with pytest.raises(ValueError): _inverse_binarize_thresholding(y=np.array([[1, 2, 3], [2, 1, 3]]), output_type="binary", classes=[1, 2, 3], threshold=0) # Fail on multioutput data with pytest.raises(ValueError): LabelBinarizer().fit(np.array([[1, 3], [2, 1]])) with pytest.raises(ValueError): label_binarize(np.array([[1, 3], [2, 1]]), [1, 2, 3])
def check_binarized_results(y, classes, pos_label, neg_label, expected): for sparse_output in [True, False]: if ((pos_label == 0 or neg_label != 0) and sparse_output): assert_raises(ValueError, label_binarize, y, classes, neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output) continue # check label_binarize binarized = label_binarize(y, classes, neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output) assert_array_equal(toarray(binarized), expected) assert_equal(issparse(binarized), sparse_output) # check inverse y_type = type_of_target(y) if y_type == "multiclass": inversed = _inverse_binarize_multiclass(binarized, classes=classes) else: inversed = _inverse_binarize_thresholding( binarized, output_type=y_type, classes=classes, threshold=((neg_label + pos_label) / 2.)) assert_array_equal(toarray(inversed), toarray(y)) # Check label binarizer lb = LabelBinarizer(neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output) binarized = lb.fit_transform(y) assert_array_equal(toarray(binarized), expected) assert_equal(issparse(binarized), sparse_output) inverse_output = lb.inverse_transform(binarized) assert_array_equal(toarray(inverse_output), toarray(y)) assert_equal(issparse(inverse_output), issparse(y))
def check_binarized_results(y, classes, pos_label, neg_label, expected): for sparse_output in [True, False]: if ((pos_label == 0 or neg_label != 0) and sparse_output): assert_raises(ValueError, label_binarize, y, classes, neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output) continue # check label_binarize binarized = label_binarize(y, classes, neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output) assert_array_equal(toarray(binarized), expected) assert_equal(issparse(binarized), sparse_output) # check inverse y_type = type_of_target(y) if y_type == "multiclass": inversed = _inverse_binarize_multiclass(binarized, classes=classes) else: inversed = _inverse_binarize_thresholding(binarized, output_type=y_type, classes=classes, threshold=((neg_label + pos_label) / 2.)) assert_array_equal(toarray(inversed), toarray(y)) # Check label binarizer lb = LabelBinarizer(neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output) binarized = lb.fit_transform(y) assert_array_equal(toarray(binarized), expected) assert_equal(issparse(binarized), sparse_output) inverse_output = lb.inverse_transform(binarized) assert_array_equal(toarray(inverse_output), toarray(y)) assert_equal(issparse(inverse_output), issparse(y))