def fit(self, x, y): n_samples, p = x.shape linear_names = ["x_{}".format(i) for i in range(p)] names = linear_names[:] data = [x[:, i] for i in range(p)] models = net(Lasso, x, y, max_coarsity=self.max_coarsity).values() scores = [model.score(x, y) for model in models] coefs = [model.coef_ for model in models] importance = get_importance(coefs, scores) stall_iter = 0 best_names = linear_names[:] best_model, best_score = _fit_model(x, y, best_names, self.operators, n_jobs=self.n_jobs) pop_size = p * (self.mu + 1 + self.q) for _ in range(self.max_iter): old_names = sorted(names[:]) stall_iter += 1 new_names = [] new_data = [] for i in range(3 * pop_size): f, new_name, parents = mutate(names, importance, self.toursize, self.operators, self.rng) if size( new_name ) <= self.max_size and new_name not in new_names and new_name not in names: with warnings.catch_warnings(): warnings.simplefilter("ignore") feature = f(*[data[i] for i in parents]) if np.all(np.isfinite(feature)) and all( abs(np.corrcoef(feature, data[i]))[1, 0] <= self.t for i in parents): new_names.append(new_name) new_data.append(feature) if len(new_names + names) < pop_size: break else: warnings.warn( "Failed to produce a new population given the tree-depth {} and correlation threshold {}." .format(self.max_size, self.t), ConvergenceWarning) names.extend(new_names) data.extend(new_data) models = net(Lasso, np.array(data).T, y, max_coarsity=self.max_coarsity).values() scores = [model.score(np.array(data).T, y) for model in models] coefs = [model.coef_ for model in models] importance = list(get_importance(coefs, scores)) names_to_discard = [ n for n in sorted(names, key=lambda x: importance[names.index(x)], reverse=True) if n not in linear_names ][-self.mu * p:] for n in names_to_discard: i = names.index(n) names.pop(i) data.pop(i) importance.pop(i) with warnings.catch_warnings(): warnings.simplefilter("ignore") model, score = _fit_model(x, y, names, self.operators, n_jobs=self.n_jobs) if score > best_score: best_model = model best_score = score stall_iter = 0 elif stall_iter >= self.max_stall_iter: break self.model = best_model return self
exponents = [1] operators = {} sym = sf.SymbolicFeatures(exponents=exponents, operators=operators) features = sym.fit_transform(x) ests = [Lasso, STRidge] attrs = ["alpha", "threshold"] names = ["Lasso", "STRidge"] for est, attr, name in zip(ests, attrs, names): models = net(est, features, y, attr, filter=True, max_coarsity=5, r_max=1e5) m = sorted(models) scores = np.array([models[k].score(features, y) for k in m]) plt.plot(m, scores, 'o--', label=name) plt.legend() plt.xlabel("# coefficient") plt.ylabel(r"$R^2$") plt.gca().invert_xaxis() plt.show()