def test_check_parameter_range(self): # verify parameter type correction with assert_raises(TypeError): check_parameter('f', 0, 100) with assert_raises(TypeError): check_parameter(argmaxn(value_list=[1, 2, 3], n=1), 0, 100) # if low and high are both unset with assert_raises(ValueError): check_parameter(50) # if low <= high with assert_raises(ValueError): check_parameter(50, 100, 99) with assert_raises(ValueError): check_parameter(50, 100, 100) # check one side with assert_raises(ValueError): check_parameter(50, low=100) with assert_raises(ValueError): check_parameter(50, high=0) assert_equal(True, check_parameter(50, low=10)) assert_equal(True, check_parameter(50, high=100)) # if check fails with assert_raises(ValueError): check_parameter(-1, 0, 100) with assert_raises(ValueError): check_parameter(101, 0, 100) with assert_raises(ValueError): check_parameter(0.5, 0.2, 0.3) # if check passes assert_equal(True, check_parameter(50, 0, 100)) assert_equal(True, check_parameter(0.5, 0.1, 0.8)) # if includes left or right bounds with assert_raises(ValueError): check_parameter(100, 0, 100, include_left=False, include_right=False) assert_equal(True, check_parameter(0, 0, 100, include_left=True, include_right=False)) assert_equal(True, check_parameter(0, 0, 100, include_left=True, include_right=True)) assert_equal(True, check_parameter(100, 0, 100, include_left=False, include_right=True)) assert_equal(True, check_parameter(100, 0, 100, include_left=True, include_right=True))
def test_argmaxn(self): ind = argmaxn(self.value_lists, 3) assert_equal(len(ind), 3) ind = argmaxn(self.value_lists, 3) assert_equal(np.sum(ind), np.sum([4, 6, 9])) ind = argmaxn(self.value_lists, 3, order='asc') assert_equal(np.sum(ind), np.sum([3, 7, 8])) with assert_raises(ValueError): argmaxn(self.value_lists, -1) with assert_raises(ValueError): argmaxn(self.value_lists, 20)
def _get_competent_detectors(self, scores): """ Identifies competent base detectors based on correlation scores Parameters ---------- scores : numpy array, shape (n_clf,) Correlation scores for each classifier (for a specific test instance) Returns ------- candidates : List Indices for competent detectors (for given test instance) """ # create histogram of correlation scores scores = scores.reshape(-1, 1) # TODO: handle when Pearson score is 0 # if scores contain nan, change it to 0 if np.isnan(scores).any(): scores = np.nan_to_num(scores) if self.n_bins > self.n_clf: warnings.warn( "The number of histogram bins is greater than the number of " "classifiers, reducing n_bins to n_clf.") self.n_bins = self.n_clf hist, bin_edges = np.histogram(scores, bins=self.n_bins) # find n_selected largest bins max_bins = argmaxn(hist, n=self.n_selected) candidates = [] # iterate through bins for max_bin in max_bins: # determine which detectors are inside this bin selected = np.where((scores >= bin_edges[max_bin]) & (scores <= bin_edges[max_bin + 1])) # add to list of candidates candidates = candidates + selected[0].tolist() return candidates
def _get_competent_detectors(self, scores): """ algorithm for selecting the most competent detectors :param scores: :param n_bins: :param n_selected: :return: """ scores = scores.reshape(-1, 1) hist, bin_edges = np.histogram(scores, bins=self.n_bins) # dense_bin = np.argmax(hist) max_bins = argmaxn(hist, n=self.n_selected) candidates = [] # print(hist) for max_bin in max_bins: # print(bin_edges[max_bin], bin_edges[max_bin+1]) selected = np.where((scores >= bin_edges[max_bin]) & (scores <= bin_edges[max_bin + 1])) # print(selected) candidates = candidates + selected[0].tolist() # print(np.mean(scores[candidates,:]), np.mean(scores)) # return np.mean(scores[candidates, :]) return candidates
def _get_competent_detectors(self, scores): """ Identifies competent base detectors based on correlation scores Parameters ---------- scores : numpy array, shape (n_clf,) Correlation scores for each classifier (for a specific test instance) Returns ------- candidates : List Indices for competent detectors (for given test instance) """ # create histogram of correlation scores scores = scores.reshape(-1, 1) if self.n_bins > self.n_clf: warnings.warn("Number of histogram bins greater than number of " "classifiers, reducing n_bins to n_clf.") self.n_bins = self.n_clf hist, bin_edges = np.histogram(scores, bins=self.n_bins) # find n_selected largest bins max_bins = argmaxn(hist, n=self.n_selected) candidates = [] # iterate through bins for max_bin in max_bins: # determine which detectors are inside this bin selected = np.where((scores >= bin_edges[max_bin]) & (scores <= bin_edges[max_bin + 1])) # add to list of candidates candidates = candidates + selected[0].tolist() return candidates
U = EMF.user_vecs V = EMF.item_vecs bias_global = EMF.global_bias bias_user = EMF.user_bias bias_item = EMF.item_bias # print(EMF.regr_multirf.predict(test_meta).shape) predicted_scores = EMF.predict_new(test_meta_transformed) predicted_scores_max = np.nanargmax(predicted_scores, axis=1) for i in range(len(test_index)): roc_comp_mat[i, 10] = test_set[i, predicted_scores_max[i]] header_mat[i, 10] = config_headers[predicted_scores_max[i]] # get the top 3 index temp_index = argmaxn(predicted_scores[i, ], n=3) roc_comp_mat[i, 18] = test_set[i, temp_index].mean() header_mat[i, 18] = config_headers[temp_index[0]] + '|' + config_headers[ temp_index[1]] + '|' + config_headers[temp_index[2]] #################################### ISAC clustering = KMeans(n_clusters=5) clustering.fit(train_meta_transformed) train_clusters = clustering.labels_ predicted_clusters = clustering.predict(test_meta_transformed) for i in range(len(test_index)): train_data_index = np.where(train_clusters == predicted_clusters[i])[0] train_data_performance = train_set[train_data_index, :] train_data_performance_individual = np.nanargmax(
def _predict_internal(self, X, predict_proba): """Internal function for predict and predict_proba Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. predict_proba : bool if True, return the result of predict_proba Returns ------- """ check_is_fitted(self, ['fitted_']) X = check_array(X) n_samples = X.shape[0] # Find neighbors for all test instances _, ind_arr = self.tree_.query(X, k=self.local_region_size) if predict_proba: y_predicted = np.zeros([n_samples, self._classes]) else: y_predicted = np.zeros([ n_samples, ]) # For each test sample for i in range(n_samples): test_sample = X[i, :].reshape(1, -1) train_inds = ind_arr[i, :] # ground truth y_train_sample = self.y_train_[train_inds] clf_performance = np.zeros([ self.n_base_estimators_, ]) for j, clf in enumerate(self.base_estimators): y_train_clf = self.y_train_predicted_[train_inds, j] clf_performance[j] = accuracy_score(y_train_sample, y_train_clf) # print(clf_performance) # get the indices of the best performing clfs select_clf_inds = argmaxn(clf_performance, n=self.n_selected_clfs) select_clf_weights = clf_performance[select_clf_inds]. \ reshape(1, len(select_clf_inds)) # print(select_clf_inds) all_scores = np.zeros([1, len(select_clf_inds)]) all_proba = np.zeros([1, self._classes, len(select_clf_inds)]) for k, clf_ind in enumerate(select_clf_inds): clf = self.base_estimators[clf_ind] # make prediction if predict_proba: all_proba[:, :, k] = clf.predict_proba(test_sample) else: all_scores[:, k] = clf.predict(test_sample) # print('score', len(select_clf_inds), all_scores) if predict_proba: if self.use_weights: y_predicted[i] = np.mean(all_proba * select_clf_weights, axis=2) else: y_predicted[i] = np.mean(all_proba, axis=2) else: if self.use_weights: y_predicted[i] = majority_vote(all_scores, n_classes=self._classes, weights=select_clf_weights) else: y_predicted[i] = majority_vote(all_scores, n_classes=self._classes) if predict_proba: return score_to_proba(y_predicted) else: return y_predicted