def print_results_tables(records, selection_method, latex): """Given all records, print a results table for each dataset.""" grouped_records = get_grouped_records(records).map( lambda group: { **group, 'sweep_acc': selection_method.sweep_acc(group['records']) }).filter(lambda g: g['sweep_acc'] is not None) # read algorithm names and sort (predefined order) alg_names = Q(records).select('args.algorithm').unique() alg_names = ([n for n in algorithms.ALGORITHMS if n in alg_names] + [n for n in alg_names if n not in algorithms.ALGORITHMS]) # read dataset names and sort (lexicographic order) dataset_names = Q(records).select('args.dataset').unique().sorted() for dataset in dataset_names: test_envs = range(datasets.NUM_ENVIRONMENTS[dataset]) table = [[None for _ in test_envs] for _ in alg_names] for i, algorithm in enumerate(alg_names): for j, test_env in enumerate(test_envs): trial_accs = (grouped_records.filter_equals( 'dataset, algorithm, test_env', (dataset, algorithm, test_env)).select('sweep_acc')) table[i][j] = format_mean(trial_accs, latex) col_labels = [ 'Algorithm', *datasets.get_dataset_class(dataset).ENVIRONMENT_NAMES ] header_text = (f'Dataset: {dataset}, ' f'model selection method: {selection_method.name}') print_table(table, header_text, alg_names, list(col_labels), colwidth=20, latex=latex) # Print an 'averages' table table = [[None for _ in dataset_names] for _ in alg_names] for i, algorithm in enumerate(alg_names): for j, dataset in enumerate(dataset_names): trial_averages = (grouped_records.filter_equals( 'algorithm, dataset', (algorithm, dataset)).group('trial_seed').map( lambda trial_seed, group: group.select('sweep_acc').mean()) ) table[i][j] = format_mean(trial_averages, latex) col_labels = ['Algorithm', *dataset_names] header_text = f'Averages, model selection method: {selection_method.name}' print_table(table, header_text, alg_names, col_labels, colwidth=25, latex=latex)
def get_grouped_records(records): """Group records by (trial_seed, dataset, algorithm, test_env). Because records can have multiple test envs, a given record may appear in more than one group.""" result = collections.defaultdict(lambda: []) for r in records: for test_env in r['args']['test_envs']: group = (r['args']['trial_seed'], r['args']['dataset'], r['args']['algorithm'], test_env) result[group].append(r) return Q([{'trial_seed': t, 'dataset': d, 'algorithm': a, 'test_env': e, 'records': Q(r)} for (t,d,a,e),r in result.items()])
def get_grouped_records(records): """Group records by (trial_seed, dataset, algorithm, test_env). Because records can have multiple test envs, a given record may appear in more than one group.""" result = collections.defaultdict(lambda: []) for r in records: for test_env in r["args"]["test_envs"]: group = (r["args"]["trial_seed"], r["args"]["dataset"], r["args"]["algorithm"], test_env) result[group].append(r) return Q([{"trial_seed": t, "dataset": d, "algorithm": a, "test_env": e, "records": Q(r)} for (t,d,a,e),r in result.items()])
def load_records(path): records = [] for i, subdir in tqdm.tqdm(list(enumerate(os.listdir(path))), ncols=80, leave=False): results_path = os.path.join(path, subdir, "results.jsonl") try: with open(results_path, "r") as f: for line in f: records.append(json.loads(line[:-1])) except IOError: pass return Q(records)
last_results_keys = results_keys misc.print_row([results[key] for key in results_keys], colwidth=12) results.update({'hparams': hparams, 'args': vars(args)}) epochs_path = os.path.join(args.output_dir, 'results.jsonl') with open(epochs_path, 'a') as f: f.write(json.dumps(results, sort_keys=True) + "\n") algorithm_dict = algorithm.state_dict() start_step = step + 1 checkpoint_vals = collections.defaultdict(lambda: []) records = [] with open(epochs_path, 'r') as f: for line in f: records.append(json.loads(line[:-1])) records = Q(records) scores = records.map( model_selection.IIDAccuracySelectionMethod._step_acc) if scores[-1] == scores.argmax('val_acc'): save_checkpoint('IID_best.pkl') algorithm.to(device) if args.save_model_every_checkpoint: save_checkpoint(f'model_step{step}.pkl') save_checkpoint('model.pkl') with open(os.path.join(args.output_dir, 'done'), 'w') as f: f.write('done')
def todo_rename(records, selection_method, latex): grouped_records = reporting.get_grouped_records(records).map(lambda group: { **group, "sweep_acc": selection_method.sweep_acc(group["records"]) } ).filter(lambda g: g["sweep_acc"] is not None) # read algorithm names and sort (predefined order) alg_names = Q(records).select("args.algorithm").unique() alg_names = ([n for n in algorithms.ALGORITHMS if n in alg_names] + [n for n in alg_names if n not in algorithms.ALGORITHMS]) # read dataset names and sort (lexicographic order) dataset_names = Q(records).select("args.dataset").unique().sorted() dataset_names = [d for d in datasets.DATASETS if d in dataset_names] for dataset in dataset_names: if latex: print() print("\\subsubsection{{{}}}".format(dataset)) test_envs = range(datasets.num_environments(dataset)) table = [[None for _ in [*test_envs, "Avg"]] for _ in alg_names] for i, algorithm in enumerate(alg_names): means = [] for j, test_env in enumerate(test_envs): trial_accs = (grouped_records .filter_equals( "dataset, algorithm, test_env", (dataset, algorithm, test_env) ).select("sweep_acc")) mean, err, table[i][j] = format_mean(trial_accs, latex) means.append(mean) if None in means: table[i][-1] = "X" else: table[i][-1] = "{:.1f}".format(sum(means) / len(means)) col_labels = [ "Algorithm", *datasets.get_dataset_class(dataset).ENVIRONMENTS, "Avg" ] header_text = (f"Dataset: {dataset}, " f"model selection method: {selection_method.name}") print_table(table, header_text, alg_names, list(col_labels), colwidth=20, latex=latex) # Print an "averages" table if latex: print() print("\\subsubsection{Averages}") table = [[None for _ in [*dataset_names, "Avg"]] for _ in alg_names] for i, algorithm in enumerate(alg_names): means = [] for j, dataset in enumerate(dataset_names): trial_averages = (grouped_records .filter_equals("algorithm, dataset", (algorithm, dataset)) .group("trial_seed") .map(lambda trial_seed, group: group.select("sweep_acc").mean() ) ) mean, err, table[i][j] = format_mean(trial_averages, latex) means.append(mean) if None in means: table[i][-1] = "X" else: table[i][-1] = "{:.1f}".format(sum(means) / len(means)) col_labels = ["Algorithm", *dataset_names, "Avg"] header_text = f"Averages, model selection method: {selection_method.name}" print_table(table, header_text, alg_names, col_labels, colwidth=25, latex=latex)
def print_results_tables(records, selection_method, latex): """Given all records, print a results table for each dataset.""" grouped_records = reporting.get_grouped_records(records).map(lambda group: { **group, "sweep_accs": selection_method.sweep_accs(group["records"]) } ) # read algorithm names and sort (predefined order) alg_names = Q(records).select("args.algorithm").unique() alg_names = ([n for n in algorithms.ALGORITHMS if n in alg_names] + [n for n in alg_names if n not in algorithms.ALGORITHMS]) # read dataset names and sort (lexicographic order) dataset_names = Q(records).select("args.dataset").unique().sorted() dataset_names = [d for d in datasets.DATASETS if d in dataset_names] for dataset in dataset_names: if latex: print() print("\\subsubsection{{{}}}".format(dataset)) test_envs = range(datasets.num_environments(dataset)) table = [[None for _ in [*test_envs, "Avg"]] for _ in alg_names] for i, algorithm in enumerate(alg_names): means = [] stdevs = [] for j, test_env in enumerate(test_envs): try: acc = grouped_records.filter_equals( "dataset, algorithm, test_env", (dataset, algorithm, test_env) )[0]['sweep_accs'][0] mean = acc['test_acc'] stdev = acc['test_acc_std'] except: mean = float('nan') stdev = float('nan') means.append(mean) stdevs.append(stdev) _, _, table[i][j] = format_mean(mean, stdev, latex) avg_mean = np.mean(means) avg_stdev = np.sqrt(np.sum(np.array(stdevs)**2)) / len(stdevs) _, _, table[i][-1] = format_mean(avg_mean, avg_stdev, latex) col_labels = [ "Algorithm", *datasets.get_dataset_class(dataset).ENVIRONMENTS, "Avg" ] header_text = (f"Dataset: {dataset}, " f"model selection method: {selection_method.name}") print_table(table, header_text, alg_names, list(col_labels), colwidth=20, latex=latex) # Print an "averages" table if latex: print() print("\\subsubsection{Averages}") table = [[None for _ in [*dataset_names, "Avg"]] for _ in alg_names] for i, algorithm in enumerate(alg_names): means = [] for j, dataset in enumerate(dataset_names): try: mean = (grouped_records .filter_equals("algorithm, dataset", (algorithm, dataset)) .select(lambda x: x['sweep_accs'][0]['test_acc']) .mean() ) except: mean = float('nan') mean *= 100. table[i][j] = "{:.1f}".format(mean) means.append(mean) table[i][-1] = "{:.1f}".format(sum(means) / len(means)) col_labels = ["Algorithm", *dataset_names, "Avg"] header_text = f"Averages, model selection method: {selection_method.name}" print_table(table, header_text, alg_names, col_labels, colwidth=25, latex=latex)