Пример #1
0
def test_split2_allclass():
    train_idx, test_idx, label_idx, unlabel_idx = split_multi_label(
        y=mult_y, label_shape=(instance_num, label_num),
        all_class=True, split_count=split_count,
        test_ratio=0.3, initial_label_rate=0.05,
        saving_path=None
    )
    assert len(train_idx) == split_count
    assert len(test_idx) == split_count
    assert len(label_idx) == split_count
    assert len(unlabel_idx) == split_count
    for i in range(split_count):
        check_index_multilabel(label_idx[i])
        check_index_multilabel(unlabel_idx[i])
        train = set(train_idx[i])
        test = set(test_idx[i])

        assert len(label_idx[i]) == len(integrate_multilabel_index(label_idx[i], label_size=label_num))
        # validity
        lab = set([j[0] for j in label_idx[i]])
        unl = set([j[0] for j in unlabel_idx[i]])
        traintest = train.union(test)
        labun = lab.union(unl)

        assert len(test) == round(0.3 * instance_num)
        assert len(lab) == round(0.05 * len(train))
        assert traintest == set(range(instance_num))
        assert labun == train
Пример #2
0
    def _get_cond_instance(self, cond):
        """Return the indexes of instances according to the cond.

        cond = 0: return the instances which are unbroken.
        cond = 1: return the instances which have missing entries.
        """
        tmp = integrate_multilabel_index(self.index,
                                         label_size=self._label_size,
                                         check_arr=False)
        if cond == 0:
            return [tp[0] for tp in tmp if len(tp) == 1]
        else:
            return [tp[0] for tp in tmp if len(tp) > 1]
Пример #3
0
def test_split3_all_features():
    train_idx, test_idx, label_idx, unlabel_idx = split_features(feature_matrix=X, feature_matrix_shape=X.shape,
                                                                 test_ratio=0.3, missing_rate=0.2,
                                                                 split_count=split_count,
                                                                 all_features=True,
                                                                 saving_path=None)
    assert len(train_idx) == split_count
    assert len(test_idx) == split_count
    assert len(label_idx) == split_count
    assert len(unlabel_idx) == split_count
    for i in range(split_count):
        train = set(train_idx[i])
        test = set(test_idx[i])
        traintest = train.union(test)

        # validity
        assert len(flattern_multilabel_index(index_arr=unlabel_idx[i], label_size=feature_num)) == round(
            0.2 * len(train) * X.shape[1])
        assert len(test) == round(0.3 * instance_num)

        assert traintest == set(range(instance_num))
        assert len(
            [j[0] for j in integrate_multilabel_index(label_idx[i] + unlabel_idx[i], label_size=feature_num)]) == len(
            train_idx[i])