Exemplos de pivot_columns em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: linalg

Método / Função: pivot_columns

Exemplos em hotexamples.com: 2

pivot_columns em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de linalg.pivot_columns em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

def _fit(self, x_train, y_train, rank=None, runtime_limit=None): """This is a single round of the doubling process. It fits an AutoLearner object on a new dataset. This will sample the performance of several algorithms on the new dataset, predict performance on the rest, then construct an optimal ensemble model. Args: x_train (np.ndarray): Features of the training dataset. y_train (np.ndarray): Labels of the training dataset. rank (int): Rank of error matrix factorization. runtime_limit (float): Maximum time to allocate to AutoLearner fitting. """ if self.verbose: print("\nSingle round runtime target: {}".format(runtime_limit)) # set to defaults if not provided rank = rank or linalg.approx_rank(self.error_matrix, threshold=0.01) runtime_limit = runtime_limit or self.runtime_limit if self.verbose: print('Fitting AutoLearner with max. runtime {}s'.format( runtime_limit)) t_predicted = convex_opt.predict_runtime( x_train.shape, runtime_matrix=self.runtime_matrix) if self.selection_method == 'qr': to_sample = linalg.pivot_columns(self.error_matrix) elif self.selection_method == 'min_variance': # select algorithms to sample only from subset of algorithms that will run in allocated time valid = np.where( t_predicted <= self.n_cores * runtime_limit / 2)[0] Y = self.Y[:rank, valid] # TODO: check if Y is rank-deficient, i.e. will ED problem fail? v_opt = convex_opt.solve(t_predicted[valid], runtime_limit / 4, self.n_cores, Y, self.scalarization) to_sample = valid[np.where(v_opt > 0.9)[0]] if np.isnan(to_sample).any(): to_sample = np.argsort(t_predicted)[:rank] elif self.selection_method == 'random': to_sample = [] # set of algorithms that are predicted to finish within given budget to_sample_candidates = np.where( t_predicted <= runtime_limit / 2)[0] # remove algorithms that have been sampled already to_sample_candidates = list( set(to_sample_candidates) - self.sampled_indices) # if the remaining time is not sufficient for random sampling if len(to_sample_candidates) == 0: to_sample = np.array([np.argmin(t_predicted)]) else: to_sample = np.random.choice(to_sample_candidates, min(self.n_cores, len(to_sample_candidates)), replace=False) else: to_sample = np.arange(0, self.new_row.shape[1]) if len(to_sample) == 0 and len(self.sampled_indices) == 0: # if no columns are selected in first iteration (log det instability), sample n fastest columns n = len( np.where( np.cumsum(np.sort(t_predicted)) <= runtime_limit / 4)[0]) if n > 0: to_sample = np.argsort(t_predicted)[:n] else: self.ensemble.fitted = False return start = time.time() if self.selection_method is not 'random': # only need to compute column entry if it has not been computed already to_sample = list(set(to_sample) - self.sampled_indices) if self.verbose: print('Sampling {} entries of new row...'.format( len(to_sample))) p1 = mp.Pool(self.n_cores) sample_models = [ Model(self.p_type, self.column_headings[i]['algorithm'], self.column_headings[i]['hyperparameters'], self.verbose, i) for i in to_sample ] sample_model_errors = [ p1.apply_async(Model.kfold_fit_validate, args=[m, x_train, y_train, 5]) for m in sample_models ] p1.close() p1.join() # update sampled indices self.sampled_indices = self.sampled_indices.union(set(to_sample)) for i, error in enumerate(sample_model_errors): cv_error, cv_predictions = error.get() sample_models[i].cv_error, sample_models[ i].cv_predictions = cv_error.mean(), cv_predictions sample_models[i].sampled = True self.new_row[:, to_sample[i]] = cv_error.mean() self.sampled_models[to_sample[i]] = sample_models[i] imputed = linalg.impute(self.error_matrix, self.new_row, list(self.sampled_indices), rank=rank) # impute ALL entries # unknown = sorted(list(set(range(self.new_row.shape[1])) - self.sampled_indices)) # self.new_row[:, unknown] = imputed[:, unknown] self.new_row = imputed.copy() # k-fold fit candidate learners of ensemble remaining = (runtime_limit - (time.time() - start)) * self.n_cores # add best sampled model to list of candidate learners to avoid empty lists best_sampled_idx = list(self.sampled_indices)[int( np.argmin(self.new_row[:, list(self.sampled_indices)]))] assert self.sampled_models[best_sampled_idx] is not None candidate_indices = [best_sampled_idx] self.ensemble.candidate_learners.append( self.sampled_models[best_sampled_idx]) for i in np.argsort(self.new_row[0]): if t_predicted[i] + t_predicted[candidate_indices].sum( ) <= remaining: last = candidate_indices.pop() assert last == best_sampled_idx candidate_indices.append(i) candidate_indices.append(last) # if model has already been k-fold fitted, immediately add to candidate learners if i in self.sampled_indices: assert self.sampled_models[i] is not None self.ensemble.candidate_learners.append( self.sampled_models[i]) # candidate learners that need to be k-fold fitted to_fit = list(set(candidate_indices) - self.sampled_indices) else: remaining = (runtime_limit - (time.time() - start)) * self.n_cores to_fit = to_sample.copy() p2 = mp.Pool(self.n_cores) candidate_models = [ Model(self.p_type, self.column_headings[i]['algorithm'], self.column_headings[i]['hyperparameters'], self.verbose, i) for i in to_fit ] candidate_model_errors = [ p2.apply_async(Model.kfold_fit_validate, args=[m, x_train, y_train, 5]) for m in candidate_models ] p2.close() p2.join() # update sampled indices self.sampled_indices = self.sampled_indices.union(set(to_fit)) for i, error in enumerate(candidate_model_errors): cv_error, cv_predictions = error.get() candidate_models[i].cv_error, candidate_models[ i].cv_predictions = cv_error.mean(), cv_predictions candidate_models[i].sampled = True self.new_row[:, to_fit[i]] = cv_error.mean() self.sampled_models[to_fit[i]] = candidate_models[i] self.ensemble.candidate_learners.append(candidate_models[i]) # self.new_row = linalg.impute(self.error_matrix, self.new_row, list(self.sampled_indices), rank=rank) if self.verbose: print('\nFitting ensemble of max. size {}...'.format( len(self.ensemble.candidate_learners))) # ensemble selection and fitting in the remaining time budget self.ensemble.fit(x_train, y_train, remaining, self.fitted_models) for model in self.ensemble.base_learners: assert model.index is not None self.fitted_indices.add(model.index) self.fitted_models[model.index] = model self.ensemble.fitted = True if self.verbose: print('\nAutoLearner fitting complete.')

Exemplo n.º 2

0

Exibir arquivo

def fit(self, x_train, y_train): """Fit an AutoLearner object on a new dataset. This will sample the performance of several algorithms on the new dataset, predict performance on the rest, then perform Bayesian optimization and construct an optimal ensemble model. Args: x_train (np.ndarray): Features of the training dataset. y_train (np.ndarray): Labels of the training dataset. """ print('Data={}'.format(x_train.shape)) self.new_row = np.zeros((1, self.error_matrix.shape[1])) known_indices = linalg.pivot_columns(self.error_matrix) print('Sampling {} entries of new row...'.format(len(known_indices))) pool1 = mp.Pool(self.n_cores) sample_models = [ Model(self.p_type, self.column_headings[i]['algorithm'], self.column_headings[i]['hyperparameters'], verbose=self.verbose) for i in known_indices ] sample_model_errors = [ pool1.apply_async(Model.kfold_fit_validate, args=[m, x_train, y_train, 5]) for m in sample_models ] pool1.close() pool1.join() for i, error in enumerate(sample_model_errors): self.new_row[:, known_indices[i]] = error.get()[0].mean() # TODO: add predictions to second layer matrix? self.new_row = linalg.impute(self.error_matrix, self.new_row, known_indices) # Add new row to error matrix at the end (might be incorrect?) # self.error_matrix = np.vstack((self.error_matrix, self.new_row)) # TODO: Fit ensemble candidates (?) if self.verbose: print('\nConducting Bayesian optimization...') n_models = 3 pool2 = Pool(self.n_cores) bayesian_opt_models = [ Model(self.p_type, self.column_headings[i]['algorithm'], self.column_headings[i]['hyperparameters'], verbose=self.verbose) for i in np.argsort(self.new_row.flatten())[:n_models] ] optimized_hyperparams = pool2.map(Model.bayesian_optimize, bayesian_opt_models) pool2.close() pool2.join() for i, params in enumerate(optimized_hyperparams): bayesian_opt_models[i].hyperparameters = params self.ensemble.add_base_learner(bayesian_opt_models[i]) self.optimized_settings.append({ 'algorithm': bayesian_opt_models[i].algorithm, 'hyperparameters': bayesian_opt_models[i].hyperparameters }) if self.verbose: print('\nFitting optimized ensemble...') self.ensemble.fit(x_train, y_train) self.ensemble.fitted = True if self.verbose: print('\nAutoLearner fitting complete.')