def _get_local_region(self, X_test_norm): """ Get local region for each test instance Parameters ---------- X_test_norm : numpy array, shape (n_samples, n_features) Normalized test data Returns ------- final_local_region_list : List of lists, shape [n_samples, [local_region]] Indices of training samples in the local region of each test sample """ # Initialize the local region list local_region_list = [[]] * X_test_norm.shape[0] if self.local_max_features > 1.0: warnings.warn( "Local max features greater than 1.0, reducing to 1.0") self.local_max_features = 1.0 # perform multiple iterations for _ in range(self.local_region_iterations): # randomly generate feature subspaces features = generate_bagging_indices( self.random_state, bootstrap_features=False, n_features=self.X_train_norm_.shape[1], min_features=int(self.X_train_norm_.shape[1] * self.local_min_features), max_features=int(self.X_train_norm_.shape[1] * self.local_max_features)) # build KDTree out of training subspace tree = KDTree(self.X_train_norm_[:, features]) # Find neighbors of each test instance _, ind_arr = tree.query(X_test_norm[:, features], k=self.local_region_size) # add neighbors to local region list for j in range(X_test_norm.shape[0]): local_region_list[j] = local_region_list[j] + ind_arr[ j, :].tolist() # keep nearby points which occur at least local_region_threshold times final_local_region_list = [[]] * X_test_norm.shape[0] for j in range(X_test_norm.shape[0]): final_local_region_list[j] = [ item for item, count in collections.Counter( local_region_list[j]).items() if count > self.local_region_threshold ] return final_local_region_list
def _get_local_region(self, X_test_norm): """ Get local region for each test instance Parameters ---------- X_test_norm : numpy array, shape (n_samples, n_features) Normalized test data Returns ------- final_local_region_list : List of lists, shape [n_samples, [local_region]] Indices of training samples in the local region of each test sample """ # Initialize the local region list local_region_list = [[]] * X_test_norm.shape[0] if self.local_max_features > 1.0: warnings.warn( "Local max features greater than 1.0, reducing to 1.0") self.local_max_features = 1.0 # perform multiple iterations for _ in range(self.local_region_iterations): # randomly generate feature subspaces features = generate_bagging_indices( self.random_state, bootstrap_features=False, n_features=self.X_train_norm_.shape[1], min_features=int( self.X_train_norm_.shape[1] * self.local_min_features), max_features=int( self.X_train_norm_.shape[1] * self.local_max_features)) # build KDTree out of training subspace tree = KDTree(self.X_train_norm_[:, features]) # Find neighbors of each test instance _, ind_arr = tree.query(X_test_norm[:, features], k=self.local_region_size) # add neighbors to local region list for j in range(X_test_norm.shape[0]): local_region_list[j] = local_region_list[j] + \ ind_arr[j, :].tolist() # keep nearby points which occur at least local_region_threshold times final_local_region_list = [[]] * X_test_norm.shape[0] for j in range(X_test_norm.shape[0]): final_local_region_list[j] = [item for item, count in collections.Counter( local_region_list[j]).items() if count > self.local_region_threshold] return final_local_region_list
def _get_local_region(self, X_test_norm): """ Get local region for each test instance Parameters ---------- X_test_norm : numpy array, shape (n_samples, n_features) Normalized test data Returns ------- final_local_region_list : List of lists, shape of [n_samples, [local_region]] Indices of training samples in the local region of each test sample """ # Initialize the local region list local_region_list = [[]] * X_test_norm.shape[0] if self.local_max_features > 1.0: warnings.warn( "Local max features greater than 1.0, reducing to 1.0") self.local_max_features = 1.0 if self.X_train_norm_.shape[1] * self.local_min_features < 1: warnings.warn( "Local min features smaller than 1, increasing to 1.0") self.local_min_features = 1.0 # perform multiple iterations for _ in range(self.local_region_iterations): # if min and max are the same, then use all features if self.local_max_features == self.local_min_features: features = range(0, self.X_train_norm_.shape[1]) warnings.warn("Local min features equals local max features; " "use all features instead.") else: # randomly generate feature subspaces features = generate_bagging_indices( self.random_state, bootstrap_features=False, n_features=self.X_train_norm_.shape[1], min_features=int( self.X_train_norm_.shape[1] * self.local_min_features), max_features=int( self.X_train_norm_.shape[1] * self.local_max_features)) # build KDTree out of training subspace tree = KDTree(self.X_train_norm_[:, features]) # Find neighbors of each test instance _, ind_arr = tree.query(X_test_norm[:, features], k=self.local_region_size) # add neighbors to local region list for j in range(X_test_norm.shape[0]): local_region_list[j] = local_region_list[j] + \ ind_arr[j, :].tolist() # keep nearby points which occur at least local_region_threshold times final_local_region_list = [[]] * X_test_norm.shape[0] for j in range(X_test_norm.shape[0]): tmp = [item for item, count in collections.Counter( local_region_list[j]).items() if count > self.local_region_threshold] decrease_value = 0 while len(tmp) < 2: decrease_value = decrease_value + 1 assert decrease_value < self.local_region_threshold tmp = [item for item, count in collections.Counter(local_region_list[j]).items() if count > (self.local_region_threshold - decrease_value)] final_local_region_list[j] = tmp return final_local_region_list
y_train_scores = clf.decision_scores_ toeplitz_time.append(time.time() - start) toeplitz_roc.append(roc_auc_score(y, y_train_scores)) toeplitz_prn.append(precision_n_scores(y, y_train_scores)) X_transformer = PCA_sklearn(n_components=dim_new).fit_transform(X) start = time.time() clf.fit(X_transformer) y_train_scores = clf.decision_scores_ pca_time.append(time.time() - start) pca_roc.append(roc_auc_score(y, y_train_scores)) pca_prn.append(precision_n_scores(y, y_train_scores)) selected_features = generate_bagging_indices(random_state=j, bootstrap_features=False, n_features=int( X.shape[1]), min_features=dim_new, max_features=dim_new + 1) assert (dim_new == len(selected_features)) X_transformer = X[:, selected_features] start = time.time() clf.fit(X_transformer) y_train_scores = clf.decision_scores_ rp_time.append(time.time() - start) rp_roc.append(roc_auc_score(y, y_train_scores)) rp_prn.append(precision_n_scores(y, y_train_scores)) print() print(mat_file_name) print('original', np.round(np.average(original_time), decimals=4), np.round(np.average(original_roc), decimals=4),