示例#1
0
    def select(self, label_index, unlabel_index, oracle=None, cost=None, budget=40):
        """Randomly selects a batch of instance-label pairs under the 
        constraints of meeting the budget conditions.

        Parameters
        ----------
        label_index: ignore
            
        unlabel_index: {list, np.ndarray, MultiLabelIndexCollection}
            The indexes of unlabeled samples. It should be a 1d array of indexes (column major, start from 0) or
            MultiLabelIndexCollection or a list of tuples with 2 elements, in which,
            the 1st element is the index of instance and the 2nd element is the index of labels.

        oracle: Oracle,(default=None)
            Oracle indicate the cost for each label.
            Oracle in active learning whose role is to label the given query.And it can also give the cost of 
            each corresponding label.The Oracle includes the label and cost information at least.
            Oracle(labels=labels, cost=cost)

        cost: np.array, (default=None), shape [1, n_classes] or [n_classes]
            The costs of querying each class.if not provide,it will all be 1. 

        budget: int, optional (default=40)
            The budget of the select cost.If cost for eatch labels is 1,will degenerate into the batch_size.

        Returns
        -------
        selected_ins_lab_pair: MultiLabelIndexCollection
            The selected instance label pair.    
        """
        unlabel_index = self._check_multi_label_ind(unlabel_index)
        n_classes = unlabel_index._label_size
        assert(len(cost) == n_classes)   

        if oracle is None and cost is None:
            raise ValueError('There is no information about the cost of each laebl. \
                            Please input Oracle or cost for the label at least.')
        if oracle:
            _, costs = oracle.query_by_index(range(n_classes))
        else:
            costs = cost

        instance_pair = MultiLabelIndexCollection(label_size=n_classes)
        un_ind = copy.deepcopy(unlabel_index)
        current_cost = 0.
        while True:
            rand = np.random.choice(len(un_ind))
            i_j = flattern_multilabel_index(un_ind.index)[rand]
            j_class = i_j[1]
            current_cost += costs[j_class]
            if current_cost > budget:
                break
            instance_pair.update(i_j)
            un_ind.difference_update(i_j)
        # return instance_pair
        return [tuple(i) for i in list(instance_pair)]
示例#2
0
 def _check_feature_ind(self, container):
     if not isinstance(container, MultiLabelIndexCollection):
         try:
             if isinstance(container[0], tuple):
                 container = MultiLabelIndexCollection(
                     container, self.X.shape[1])
             else:
                 container = MultiLabelIndexCollection.construct_by_1d_array(
                     container, label_mat_shape=self.X.shape)
         except:
             raise ValueError(
                 "Please pass a 1d array of indexes or MultiLabelIndexCollection (column major, start from 0)"
                 "or a list of tuples with 2 elements, in which, the 1st element is the index of instance "
                 "and the 2nd element is the index of features.")
     return container
示例#3
0
 def _check_multi_label_ind(self, container):
     """Check if the given array is an array of multi label indexes."""
     if not isinstance(container, MultiLabelIndexCollection):
         try:
             if isinstance(container[0], tuple):
                 container = MultiLabelIndexCollection(
                     container, self.y.shape[1])
             else:
                 container = MultiLabelIndexCollection.construct_by_1d_array(
                     container, label_mat_shape=self.y.shape)
         except:
             raise ValueError(
                 "Please pass a 1d array of indexes or MultiLabelIndexCollection (column major, "
                 "start from 0) or a list "
                 "of tuples with 2 elements, in which, the 1st element is the index of instance "
                 "and the 2nd element is the index of label.")
     return copy.copy(container)
示例#4
0
a_ind = IndexCollection(a)
# add a single index, warn if there is a repeated element.
a_ind.add(4)
# discard a single index, warn if not exist.
a_ind.discard(4)
# add a batch of indexes.
a_ind.update([4, 5])
# discard a batch of indexes.
a_ind.difference_update([1, 2])
print(a_ind)

# ---------MultiLabelIndexCollection-------------
from alipy.index import MultiLabelIndexCollection
multi_lab_ind1 = MultiLabelIndexCollection([(0, 1), (0, 2), (0, (3, 4)),
                                            (1, (0, 1))],
                                           label_size=5)
multi_lab_ind1.update((0, 0))
multi_lab_ind1.update([(1, 2), (1, (3, 4))])
multi_lab_ind1.update([(2, )])
multi_lab_ind1.difference_update([(0, )])
print(multi_lab_ind1)

# matlab style 1d index supporting
b = [1, 4, 11]
mi = MultiLabelIndexCollection.construct_by_1d_array(array=b,
                                                     label_mat_shape=(3, 4))
print(mi)
print('col major:', mi.get_onedim_index(order='F', ins_num=3))
print('row major:', mi.get_onedim_index(order='C'))
示例#5
0
def hierarchical_multilabel_mark(multilabel_index, label_index, label_tree, y_true):
    """"Complete instance-label information according to hierarchy in the label-tree.
    
    Parameters
    ----------
    label_index: {list, np.ndarray, MultiLabelIndexCollection}
        The indexes of labeled samples. It should be a 1d array of indexes (column major, start from 0) or
        MultiLabelIndexCollection or a list of tuples with 2 elements, in which,
        the 1st element is the index of instance and the 2nd element is the index of labels.

    multilabel_index: {list, np.ndarray, MultiLabelIndexCollection}
        The indexes of labeled samples. It should be a 1d array of indexes (column major, start from 0) or
        MultiLabelIndexCollection or a list of tuples with 2 elements, in which,
        the 1st element is the index of instance and the 2nd element is the index of labels.

    label_tree: np.ndarray
        The hierarchical relationships among data features.
        if node_i is the parent of node_j , then label_tree(i,j)=1

    y_true: 2D array, optional (default=None)
        Label matrix of the whole dataset. It is a reference which will not use additional memory.
        shape [n_samples, n_classes]
    
    Returns
    -------
    selected_ins_lab_pair: list
        A list of tuples that contains the indexes of selected instance-label pairs. 
    """
    # try to convert the indexes
    if not isinstance(multilabel_index, MultiLabelIndexCollection):
        try:
            if isinstance(multilabel_index[0], tuple):
                container = MultiLabelIndexCollection(multilabel_index, np.shape(y_true)[1])
            else:
                container = MultiLabelIndexCollection.construct_by_1d_array(multilabel_index, label_mat_shape=np.shape(y_true))
        except:
            raise ValueError(
                "Please pass a 1d array of indexes or MultiLabelIndexCollection (column major, "
                "start from 0) or a list "
                "of tuples with 2 elements, in which, the 1st element is the index of instance "
                "and the 2nd element is the index of label.")
        multilabel_index = copy.deepcopy(container)
    
    if not isinstance(label_index, MultiLabelIndexCollection):
        try:
            if isinstance(label_index[0], tuple):
                container = MultiLabelIndexCollection(label_index, np.shape(y_true)[1])
            else:
                container = MultiLabelIndexCollection.construct_by_1d_array(label_index, label_mat_shape=np.shape(y_true))
        except:
            raise ValueError(
                "Please pass a 1d array of indexes or MultiLabelIndexCollection (column major, "
                "start from 0) or a list "
                "of tuples with 2 elements, in which, the 1st element is the index of instance "
                "and the 2nd element is the index of label.")
        label_index = copy.deepcopy(container)
    
    n_classes = multilabel_index._label_size
    assert(np.shape(label_tree)[0] == n_classes and np.shape(label_tree)[1] == n_classes)

    add_label_index = MultiLabelIndexCollection(label_size=n_classes)
      
    for instance_label_pair in multilabel_index:
        i_instance = instance_label_pair[0]
        j_label = instance_label_pair[1]
        if y_true[instance_label_pair] == 1:
            for descent_label in range(n_classes):
                if label_tree[j_label][descent_label] == 1:
                    if (not (i_instance, descent_label) in label_index):
                        add_label_index.update((i_instance, descent_label))   
        elif y_true[instance_label_pair] == -1:
            for parent_label in range(n_classes):
                if label_tree[parent_label][j_label] == 1:
                    if (not (i_instance, parent_label) in label_index):
                        add_label_index.update((i_instance, parent_label))

    for i in add_label_index:
        if not i in multilabel_index:
            multilabel_index.update(i)
    return multilabel_index
示例#6
0
# Use the default Logistic Regression classifier
model = LogisticRegression()

# The cost budget is 50 times querying
stopping_criterion = StoppingCriteria('num_of_queries', 50)

AFASMC_result = []
rand_result = []
Stable_result = []

# AFASMC
for i in range(5):
    train_idx = tr[i]
    test_idx = te[i]
    label_ind = MultiLabelIndexCollection(lab[i], label_size=X.shape[1])
    unlab_ind = MultiLabelIndexCollection(unlab[i], label_size=X.shape[1])
    saver = StateIO(i, train_idx, test_idx, label_ind, unlab_ind)
    strategy = QueryFeatureAFASMC(X=X, y=y, train_idx=train_idx)

    while not stopping_criterion.is_stop():
        # query
        selected_feature = strategy.select(observed_entries=label_ind,
                                           unkonwn_entries=unlab_ind)

        # update index
        label_ind.update(selected_feature)
        unlab_ind.difference_update(selected_feature)

        # train/test
        lab_in_train = map_whole_index_to_train(train_idx, label_ind)