def make_results_table(models=None, tasks=None, max_npe='5M', name_prefix='bf_rl_paper', extra_desc='', models_dir='/tmp'): """Creates a table of results: algorithm + version by tasks. Args: models: The table columns. A list of (algorithm, desc) tuples. tasks: The table rows. List of task names. max_npe: String SI unit representation of the maximum NPE threshold for the experiment. For example, "5M" means 5 million. All entries in the table share the same max-NPE. name_prefix: Name prefix used in logging directory for the experiment. extra_desc: Extra description added to name of logging directory for the experiment. models_dir: Parent directory containing all experiment folders. Returns: A 2D list holding the table cells. """ if models is None: models = DEFAULT_MODELS if tasks is None: tasks = DEFAULT_TASKS model_results = {} for model_type, desc in models: model_results[model_type] = { tname: get_results_for_experiment(models_dir, tname, model_type, max_npe, desc, name_prefix=name_prefix, extra_desc=extra_desc).processed for tname in tasks } def info(stats): return [ str(stats['repetitions']), '%.2f' % stats['success_rate'], str(int(stats['avg_total_npe'])) ] rows = [['max NPE: ' + max_npe] + misc.flatten([['{0} ({1})'.format(m, d), '', ''] for m, d in models])] rows.append([''] + misc.flatten([['reps', 'success rate', 'avg NPE'] for _ in models])) for tname in tasks: rows.append([tname] + misc.flatten( [info(model_results[model][tname]) for model, _ in models])) return rows
def make_results_table( models=None, tasks=None, max_npe='5M', name_prefix='bf_rl_paper', extra_desc='', models_dir='/tmp'): """Creates a table of results: algorithm + version by tasks. Args: models: The table columns. A list of (algorithm, desc) tuples. tasks: The table rows. List of task names. max_npe: String SI unit representation of the maximum NPE threshold for the experiment. For example, "5M" means 5 million. All entries in the table share the same max-NPE. name_prefix: Name prefix used in logging directory for the experiment. extra_desc: Extra description added to name of logging directory for the experiment. models_dir: Parent directory containing all experiment folders. Returns: A 2D list holding the table cells. """ if models is None: models = DEFAULT_MODELS if tasks is None: tasks = DEFAULT_TASKS model_results = {} for model_type, desc in models: model_results[model_type] = { tname: get_results_for_experiment( models_dir, tname, model_type, max_npe, desc, name_prefix=name_prefix, extra_desc=extra_desc ).processed for tname in tasks} def info(stats): return [str(stats['repetitions']), '%.2f' % stats['success_rate'], str(int(stats['avg_total_npe']))] rows = [['max NPE: ' + max_npe] + misc.flatten([['{0} ({1})'.format(m, d), '', ''] for m, d in models])] rows.append( [''] + misc.flatten([['reps', 'success rate', 'avg NPE'] for _ in models])) for tname in tasks: rows.append( [tname] + misc.flatten([info(model_results[model][tname]) for model, _ in models])) return rows
def __init__(self): # Paths are sequences of sub-sequences. Here we form unique sub-sequences # out of 3 arbitrary ints. We use sub-sequences instead of single entities # to make the task harder by making the episodes last longer, i.e. more # for the agent to remember. a = (1, 2, 3) b = (4, 5, 6) c = (7, 8, 7) d = (6, 5, 4) e = (3, 2, 1) f = (8, 5, 1) g = (6, 4, 2) h = (1, 8, 3) self.paths = Trie() self.paths.insert([a, b, h]) self.paths.insert([a, b, c, d, e, f, g, h]) self.paths.insert([a, b, c, d, e, b, a]) self.paths.insert([a, b, g, h]) self.paths.insert([a, e, f, g]) self.correct_sequence = misc.flatten([a, b, c, d, e, f, g, h]) def distance_fn(a, b): len_diff = abs(len(a) - len(b)) return sum( reward_lib.mod_abs_diff(ai - 1, bi - 1, 8) for ai, bi in zip(a, b)) + len_diff * 4 # 8 / 2 = 4 self.distance_fn = distance_fn
def __init__(self): # Paths are sequences of sub-sequences. Here we form unique sub-sequences # out of 3 arbitrary ints. We use sub-sequences instead of single entities # to make the task harder by making the episodes last longer, i.e. more # for the agent to remember. a = (1, 2, 3) b = (4, 5, 6) c = (7, 8, 7) d = (6, 5, 4) e = (3, 2, 1) f = (8, 5, 1) g = (6, 4, 2) h = (1, 8, 3) self.paths = Trie() self.paths.insert([a, b, h]) self.paths.insert([a, b, c, d, e, f, g, h]) self.paths.insert([a, b, c, d, e, b, a]) self.paths.insert([a, b, g, h]) self.paths.insert([a, e, f, g]) self.correct_sequence = misc.flatten([a, b, c, d, e, f, g, h]) def distance_fn(a, b): len_diff = abs(len(a) - len(b)) return sum(reward_lib.mod_abs_diff(ai - 1, bi - 1, 8) for ai, bi in zip(a, b)) + len_diff * 4 # 8 / 2 = 4 self.distance_fn = distance_fn