def plot_overfit_curve(da_tr, da_te, num_trials=100, feedback_size=0.5, save=True): Ds, Fs, Ps = [], [], [] P_perfs = da_te.perfs Pas = get_average_rank(P_perfs).argsort() P = inv_perm(Pas) for t in range(num_trials): # Use a part of data as feedback and the rest as final # Use all data to estimate G da_D, da_F = da_tr.train_test_split( train_size=feedback_size, shuffling=True, ) D_perfs = da_D.perfs F_perfs = da_F.perfs Das = get_average_rank(D_perfs).argsort() Fas = get_average_rank(F_perfs).argsort() D = inv_perm(Das) F = inv_perm(Fas) Ds.append(D) Fs.append(F) Ps.append(P) # Name of the DA matrix da_name = da_tr.name[:-11] name_expe = 'plot-overfit-curve' plot_overfit_curve_DFP(Ds, Fs, Ps, da_name=da_name, name_expe=name_expe)
def plot_overfit_curve_sample_test(da_matrix, num_trials=100, save=True): Ds, Fs, Ps = [], [], [] for t in range(num_trials): # Use a part of data as feedback and the rest as final # Use all data to estimate G da_DF, da_P = da_matrix.train_test_split( train_size=2 / 3, shuffling=True, ) da_D, da_F = da_DF.train_test_split( train_size=1 / 2, shuffling=True, ) D_perfs = da_D.perfs F_perfs = da_F.perfs P_perfs = da_P.perfs Das = get_average_rank(D_perfs).argsort() Fas = get_average_rank(F_perfs).argsort() Pas = get_average_rank(P_perfs).argsort() D = inv_perm(Das) F = inv_perm(Fas) P = inv_perm(Pas) Ds.append(D) Fs.append(F) Ps.append(P) # Name of the DA matrix da_name = da_matrix.name name_expe = 'plot-overfit-curve-sample-test' plot_overfit_curve_DFP(Ds, Fs, Ps, da_name=da_name, name_expe=name_expe)
def plot_ofc_disjoint_tasks(da_matrix, n_tasks_per_split=1): Ds, Fs, Ps = [], [], [] perfs = da_matrix.perfs n_datasets = len(da_matrix.datasets) ntps = n_tasks_per_split N = 3 * ntps for i in range(n_datasets // N): D_perfs = perfs[i * N:i * N + ntps] F_perfs = perfs[i * N + ntps:i * N + 2 * ntps] P_perfs = perfs[i * N + 2 * ntps:i * N + 3 * ntps] Das = get_average_rank(D_perfs).argsort() Fas = get_average_rank(F_perfs).argsort() Pas = get_average_rank(P_perfs).argsort() D = inv_perm(Das) F = inv_perm(Fas) P = inv_perm(Pas) Ds.append(D) Fs.append(F) Ps.append(P) da_name = da_matrix.name name_expe = 'ofc-disjoint-tasks' plot_overfit_curve_DFP(Ds, Fs, Ps, da_name=da_name, name_expe=name_expe)
def __call__(self, dist_pred, da_te): """Compute the expected average rank of `dist_pred`. Args: dist_pred: int or list of length `A` (number of algorithms), a probability distribution on {1,...,A}. da_te: DAMatrix of shape (D, A) for meta-test, where D is the number of tasks/datasets. """ perfs = da_te.perfs n_algos = perfs.shape[-1] avg_rank = get_average_rank(perfs, normalized=True) res = 0 # When the predicted distribution is one index if not isinstance(dist_pred, Iterable): return avg_rank[dist_pred] for i in range(n_algos): res += dist_pred[i] * avg_rank[i] return res
def get_meta_learner_avg_rank(da_tr, da_te, meta_learner, repeat=10): n_algos = len(da_tr.algos) perfs_te = da_te.perfs avg_ranks_te = get_average_rank(perfs_te) avg_ranks_fit = [] ks = [] for i in range(repeat): meta_learner.meta_fit(da_tr) try: print(meta_learner.name, da_tr.name, len(da_tr.algos), meta_learner.k) ks.append(meta_learner.k) except: print("No info on k.") idx = meta_learner.indices_algo_to_reveal[0] print("Chosen algorithm: {}".format(str(da_tr.algos[idx]))) ar = avg_ranks_te[idx] avg_ranks_fit.append(ar) mean = np.mean(avg_ranks_fit) std = np.std(avg_ranks_fit) return mean, std, ks