def dist_keys(self): """ The keys that were searched over. """ distributions = self.objects.load_object('metadata', 'distributions') if isinstance(distributions, list): keys = set() for d in distributions: keys |= set(d.keys()) keys = list(keys) else: distributions = Config(distributions) keys = list(distributions.keys()) keys.append('idx') return sorted(set(keys))
def _print_config_cmd(path): search = HyperSearch(path) print("BASE CONFIG") print(search.objects.load_object('metadata', 'config')) dist = search.objects.load_object('metadata', 'dist') dist = Config(dist) print('\n' + '*' * 100) print("PARAMETER DISTRIBUTION") pprint(dist)
def test_basic(): c = Config() c.a = 1 assert c.a == 1 assert c['a'] == 1 c['b'] = 2 assert c.b == 2 assert c['b'] == 2 with pytest.raises(AssertionError): c[1] = 2 with pytest.raises(AssertionError): c['1'] = 2 with pytest.raises(KeyError): c['1'] with pytest.raises(AttributeError): c.c
def nested_sample(param_dist, n_samples=1): """ Generate all samples from a distribution. Distribution must be specified as a dictionary mapping from names to either a list of possible values or a distribution (i.e. has a method `rvs`). """ assert isinstance(param_dist, dict) config = Config(param_dist) flat = config.flatten() other = {} samples = [] sampled_keys = [] for k in sorted(flat.keys()): v = flat[k] try: samples.append(list(np.random.choice(list(v), size=n_samples))) except (TypeError, ValueError): if hasattr(v, 'rvs'): samples.append(v.rvs(n_samples)) sampled_keys.append(k) else: other[k] = v else: sampled_keys.append(k) samples = sorted(zip(*samples)) configs = [] for sample in samples: new = Config(deepcopy(other.copy())) for k, s in zip(sampled_keys, sample): new[k] = s configs.append(type(param_dist)(new)) return configs
def generate_all(param_dist): """ Generate all samples from a parameter distribution. Distribution must be specified as a dictionary mapping from names to lists of possible values. """ assert isinstance(param_dist, dict) config = Config(param_dist) flat = config.flatten() other = {} sampled_keys = [] lists = [] for k in sorted(flat.keys()): v = flat[k] try: lists.append(list(v)) except (TypeError, ValueError): if hasattr(v, 'rvs'): raise Exception( "Attempting to generate all samples, but element {} " "with key {} is a continuous distribution.".format(v, k)) other[k] = v else: sampled_keys.append(k) param_sets = sorted(product(*lists)) configs = [] for pset in param_sets: new = Config(deepcopy(other.copy())) for k, p in zip(sampled_keys, pset): new[k] = p configs.append(type(param_dist)(new)) return configs
def sample_configs(distributions, n_repeats, n_samples=None): """ Samples configs from a distribution for hyper-parameter search. Parameters ---------- distributions: dict or None Mapping from parameter names to distributions (objects with member function ``rvs`` which accepts a shape and produces an array of samples with that shape). n_repeats: int > 0 Number of different seeds to use for each sampled configuration. n_samples: int > 0 Number of configs to sample. """ samples = [] if distributions is None: samples = [Config()] elif isinstance(distributions, list): samples = distributions + [] if n_samples: samples = list(np.random.permutation(samples)[:n_samples]) else: if not n_samples: samples = generate_all(distributions) else: samples = nested_sample(distributions, n_samples) print("Sampled configs:") pprint(samples) configs = [] for i, s in enumerate(samples): s['idx'] = i for r in range(n_repeats): _new = copy.deepcopy(s) _new['repeat'] = r _new['seed'] = gen_seed() configs.append(_new) return configs
def __call__(self, updater): self.fetches = "inp output" fetched = self._fetch(updater) fetched = Config(fetched) inp = fetched['inp'] output = fetched['output'] T = inp.shape[1] mean_image = np.tile(inp.mean(axis=1, keepdims=True), (1, T, 1, 1, 1)) B = inp.shape[0] fig_unit_size = 3 fig_height = B * fig_unit_size fig_width = 7 * fig_unit_size diff = self.normalize_images( np.abs(inp - output).sum(axis=-1, keepdims=True)) xent = self.normalize_images( xent_loss(pred=output, label=inp, tf=False).sum(axis=-1, keepdims=True)) diff_mean = self.normalize_images( np.abs(mean_image - output).sum(axis=-1, keepdims=True)) xent_mean = self.normalize_images( xent_loss(pred=mean_image, label=inp, tf=False).sum(axis=-1, keepdims=True)) path = self.path_for("animation", updater, ext=None) fig, axes, anim, path = animate(inp, output, diff.astype('f'), xent.astype('f'), mean_image, diff_mean.astype('f'), xent_mean.astype('f'), figsize=(fig_width, fig_height), path=path, square_grid=False) plt.close()
import tensorflow as tf import numpy as np import os from dps import cfg from dps.config import DEFAULT_CONFIG from dps.utils import Config from dps.utils.tf import uninitialized_variables_initializer import auto_yolo.algs as algs config = DEFAULT_CONFIG.copy() config.update(algs.yolo_air_config) config.update(background_cfg=Config(mode="colour", colour="black"), ) image_shape = (48, 48, 3) load_path = "" class Environment: @property def obs_shape(self): return image_shape env = Environment() class Updater: pass
def _search_plot_cmd( path, y_field, x_field, groupby, spread_measure, style, do_legend=False, **axes_kwargs): path = process_path(path) print("Plotting searches stored at {}.".format(path)) search = HyperSearch(path) with plt.style.context(style): ax = plt.axes(xlabel=x_field, ylabel=y_field, **axes_kwargs) dist = search.objects.load_object('metadata', 'dist') dist = Config(dist) df = search.extract_summary_data() groups = sorted(df.groupby(groupby)) colours = plt.rcParams['axes.prop_cycle'].by_key()['color'] legend = [] for i, (k, _df) in enumerate(groups): values = list(_df.groupby(x_field)) x = [v[0] for v in values] ys = [v[1][y_field] for v in values] y = [_y.mean() for _y in ys] if spread_measure == 'std_dev': y_upper = y_lower = [_y.std() for _y in ys] elif spread_measure == 'conf_int': conf_int = [confidence_interval(_y.values, 0.95) for _y in ys] y_lower = y - np.array([ci[0] for ci in conf_int]) y_upper = np.array([ci[1] for ci in conf_int]) - y elif spread_measure == 'std_err': y_upper = y_lower = [standard_error(_y.values) for _y in ys] else: raise Exception("NotImplemented") yerr = np.vstack((y_lower, y_upper)) c = colours[i % len(colours)] ax.semilogx(x, y, c=c, basex=2) handle = ax.errorbar(x, y, yerr=yerr, c=c) label = "{} = {}".format(groupby, k) legend.append((handle, label)) if do_legend: handles, labels = zip(*legend) ax.legend( handles, labels, loc='center left', bbox_to_anchor=(1.05, 0.5), ncol=1) # plt.subplots_adjust( # left=0.09, bottom=0.13, right=0.7, top=0.93, wspace=0.05, hspace=0.18) filename = "value_plot.pdf" print("Saving plot as {}".format(filename)) plt.savefig(filename)
alg_config = Config( get_updater=core.Updater, batch_size=32, lr_schedule=1e-4, optimizer_spec="adam", max_grad_norm=1.0, use_gpu=True, gpu_allow_growth=True, max_experiments=None, preserve_env=False, stopping_criteria="loss_reconstruction,min", threshold=-np.inf, load_path=-1, start_tensorboard=5, max_steps=int(3e5), patience=50000, render_step=10000, eval_step=1000, display_step=1000, curriculum=[dict()], tile_shape=(48, 48), n_samples_per_image=4, postprocessing="", fixed_weights="", fixed_values=dict(), no_gradient="", attr_prior_mean=0.0, attr_prior_std=1.0, z_prior_mean=0.0, z_prior_std=1.0, A=50, n_objects_per_cell=1, train_reconstruction=True, train_kl=True, reconstruction_weight=1.0, kl_weight=1.0, math_weight=0.0, train_math=False, math_A=None, noisy=True, build_background_encoder=lambda scope: MLP(n_units=[10, 10], scope=scope), build_background_decoder=IdentityFunction, max_possible_objects=None)
def get_env_config(task, size=14, in_colour=False, ops="addition", image_size="normal", **_): if task == "xo": return env_config.copy( env_name="xo", # build_env=Nips2018XO, one_hot=True, image_shape=(72, 72), postprocessing="", max_entities=30, max_episode_length=100, n_train=1000, n_val=1000, balanced=True, classes=[-1, 0, 1], n_actions=4, backgrounds="", backgrounds_sample_every=False, background_colours="", background_cfg=dict(mode="colour", colour="black"), eval_step=1000, display_step=1000, render_step=5000, ) elif task == "collect": config = env_config.copy(collect.config) config.render_hook = None config.hooks = [] config.exploration_schedule = None config.update(env_name="collect", build_env=Nips2018Collect, n_train=25000, n_val=1000, keep_prob=0.25, background_cfg=dict(mode="colour", colour="white")) return config elif task == "collect_rl": return Config(env_name="collect_rl") config = grid_config.copy() config.env_name = "task={}".format(task) if task == "arithmetic": config.env_name += "_ops={}".format(ops) if in_colour: config.colours = "red blue green cyan yellow magenta" if task == "grid": if image_size == "small": config.update( min_chars=1, max_chars=4, image_shape=(50, 50), grid_shape=(3, 3), spacing=(0, 0), random_offset_range=(8, 8), colours="", ) elif image_size == "pretrain": config.update( min_chars=1, max_chars=1, image_shape=(15, 15), grid_shape=(1, 1), spacing=(-3, -3), random_offset_range=(1, 1), ) return config size = int(size) if size == 14: config.update( max_overlap=14 * 14 / 2, min_chars=15, max_chars=15, ) elif size == 21: config.update( max_overlap=21 * 21 / 2, min_chars=12, max_chars=12, patch_size_std=0.05, ) else: raise Exception("Unknown size {}".format(size)) if task == "scatter": config.build_env = Nips2018Scatter elif task == "arithmetic": config.update( build_env=Nips2018Arithmetic, image_shape=(48, 48), min_digits=1, max_digits=9, n_classes=82, largest_digit=81, # max_digits=6, # n_classes=55, # largest_digit=54, one_hot=True, reductions="sum" if ops == "addition" else "A:sum,N:min,X:max,C:len", ) elif task == "arithmetic2": config.update( build_env=Nips2018Arithmetic, image_shape=(48, 48), min_digits=5, max_digits=5, n_classes=46, largest_digit=45, # max_digits=6, # n_classes=55, # largest_digit=54, one_hot=True, reductions="sum" if ops == "addition" else "A:sum,N:min,X:max,C:len", ) elif task == "small": config.update( build_env=Nips2018Arithmetic, image_shape=(24, 24), min_digits=1, max_digits=1, n_classes=10, largest_digit=9, # max_digits=6, # n_classes=55, # largest_digit=54, one_hot=True, reductions="sum" if ops == "addition" else "A:sum,N:min,X:max,C:len", ) elif task == "shapes": config.update( build_env=Nips2018Shapes, image_shape=(48, 48), shapes= "green,circle blue,circle orange,circle teal,circle red,circle black,circle", background_colours="white", ) elif task == "shapes_qa": colours = "red blue green".split() shapes = "circle triangle x".split() distractor_shapes = [ "{},{}".format(c, s) for c, s in itertools.product(colours, shapes) ] hook_kwargs = dict(plot_step=100, n=100, initial=True) config.update(build_env=Nips2018ShapesQA, n_classes=2, distractor_shapes=distractor_shapes, n_distractor_shapes=None, image_shape=(48, 48), background_colours="white", hooks=[ EvalHook(BlueXAboveRedCircle, dataset_kwargs=dict(seed=3, n_distractor_shapes=0), **hook_kwargs) ]) elif task == "set": config.update( build_env=Nips2018Set, n_classes=2, n_distractor_shapes=None, image_shape=(48, 48), colours="red green blue", shapes="circle square diamond", digits="simple1 simple2 simple3", digit_colour="black", n_cards=7, set_size=3, n_train=128000, max_overlap=14 * 14 / 3, background_colours="cyan magenta yellow", background_cfg=dict(mode="learn", A=3), ) elif task == "clevr": config.update( build_env=Nips2018Clevr, image_shape=(80, 120), ) elif task == "atari": config.update( build_env=Nips2018Atari, history_length=1, store_o=True, store_r=False, store_a=False, store_next_o=False, after_warp=False, max_samples_per_ep=100, train_episode_range=(None, -2), val_episode_range=(-2, -1), test_episode_range=(-1, None), background_cfg=dict(mode="learn_solid"), ) else: raise Exception("Unknown task `{}`".format(task)) return config
class Nips2018Collect(Environment): def __init__(self): train_seed, val_seed, test_seed = 0, 1, 2 env = collect.build_env().gym_env train = GameDataset(env=env, n_examples=cfg.n_train, seed=train_seed) val = GameDataset(env=env, n_examples=cfg.n_val, seed=val_seed) test = GameDataset(env=env, n_examples=cfg.n_val, seed=test_seed) self.datasets = dict(train=train, val=val, test=test) env_config = Config( train_episode_range=(0.0, 0.8), val_episode_range=(0.8, 0.9), test_episode_range=(0.9, 1.0), n_frames=0, ) grid_config = env_config.copy( env_name="nips_2018_grid", build_env=Nips2018Grid, # dataset params min_chars=16, max_chars=25, n_patch_examples=0, image_shape=(6 * 14, 6 * 14), patch_shape=(14, 14), characters=list(range(10)), patch_size_std=0.0,
import numpy as np from dps.register import RegisterBank from dps.env import TensorFlowEnv from dps.utils import Param, Config def build_grid(): return Grid() config = Config( build_env=build_grid, curriculum=[dict(), dict()], env_name='grid', restart_prob=0.0, l2l=False, shape=(5, 5), T=20, n_val=100, ) class Grid(TensorFlowEnv): action_names = '^ > v <'.split() T = Param() shape = Param() restart_prob = Param() l2l = Param() n_val = Param()
import matplotlib.pyplot as plt import numpy as np from dps.utils import Config from dps import cfg config = Config( prior_log_odds=-0.25, n_objects=32, ) with config: cfg.update_from_command_line() failure_prob = 1. / (1. + np.exp(-cfg.prior_log_odds)) print(failure_prob) support = np.arange(cfg.n_objects + 1) raw_probs = (1 - failure_prob) * failure_prob**support probs = raw_probs / raw_probs.sum() print(probs) fig_height = 2 fig_width = 1 fig_unit_size = 3 fig, axes = plt.subplots(fig_height, fig_width, figsize=(fig_unit_size * fig_width, fig_unit_size * fig_height))
config = Config( # atari_game="Berzerk", # atari_game="Venture", # atari_game="DemonAttack", # atari_game="Phoenix", # atari_game="AirRaid", atari_game="Pong", # atari_game="Assault", # atari_game="SpaceInvaders", # atari_game="Carnival", # atari_game="Asteroids", # atari_game="Asteroids", n_frames=16, image_shape=None, after_warp=False, # episode_indices=slice(0, 2), episode_indices=slice(-2, None), # episode_indices=None, max_episodes=100, max_examples=200, sample_density=0.05, frame_skip=1, seed=201, N=16, n_dilate=1, n_erode=0, filter_size=2, allowed_colors_for_annotations=None, distance_threshold=10000, distance_ord=2, average_frames=False, # crop=(0, 195, 0, 160), # For space invaders # crop=(30, 215, 0, 160), # For carnival # crop=(0, 188, 0, 160), crop=None, tile_shape=(72, 72), postprocessing="", # postprocessing="random" get_annotations=True, )
from dps.register import RegisterBank from dps.env import TensorFlowEnv from dps.utils import Param, Config def build_env(): return CliffWalk() config = Config( build_env=build_env, curriculum=[dict()], env_name='cliff_walk', T=20, n_states=10, order=None, n_actions=2, stopping_criteria="reward_per_ep,max", threshold=10.0, ) class CliffWalk(TensorFlowEnv): """ An abstraction of the classic cliff walk domain. The agent needs to choose the exact correct sequence of actions. Any action move and it is sent back at the initial state, where it starts again. Choosing the correct action advances it to the next state. A reward is received for choosing the correct answer in the final state. """ T = Param(help="Number of timesteps")
def run_experiment(name, base_config, readme, distributions=None, durations=None, name_variables=None, alg_configs=None, env_configs=None, late_config=None, cl_mode='lax', run_kwargs_base=None): name = sanitize(name) durations = durations or {} parser = argparse.ArgumentParser() if env_configs is not None: parser.add_argument('env') if alg_configs is not None: parser.add_argument('alg') parser.add_argument("duration", choices=list(durations.keys()) + ["local"], default="local", nargs="?") args, _ = parser.parse_known_args() config = get_default_config() config.update(base_config) if env_configs is not None: env_config = env_configs[args.env] config.update(env_config) if alg_configs is not None: alg_config = alg_configs[args.alg] config.update(alg_config) if late_config is not None: config.update(late_config) env_name = sanitize(config.get('env_name', '')) alg_name = sanitize(config.get("alg_name", "")) run_kwargs = Config( kind="slurm", ignore_gpu=False, ) if run_kwargs_base is not None: run_kwargs.update(run_kwargs_base) if args.duration == "local": run_kwargs.update(durations.get('local', {})) else: run_kwargs.update(durations[args.duration]) if 'config' in run_kwargs: config.update(run_kwargs.config) del run_kwargs['config'] if cl_mode is not None: if cl_mode == 'strict': config.update_from_command_line(strict=True) elif cl_mode == 'lax': config.update_from_command_line(strict=False) else: raise Exception("Unknown value for cl_mode: {}".format(cl_mode)) if args.duration == "local": config.exp_name = "alg={}".format(alg_name) with config: return training_loop() else: if 'distributions' in run_kwargs: distributions = run_kwargs['distributions'] del run_kwargs['distributions'] if name_variables is not None: name_variables_str = "_".join( "{}={}".format(sanitize(k), sanitize(getattr(config, k))) for k in name_variables.split(",")) env_name = "{}_{}".format(env_name, name_variables_str) config.env_name = env_name exp_name = "env={}_alg={}_duration={}".format(env_name, alg_name, args.duration) init() build_and_submit(name, exp_name, config, distributions=distributions, **run_kwargs)
import numpy as np from dps.register import RegisterBank from dps.env import TensorFlowEnv from dps.utils import Param, Config def build_env(): return GridBandit() config = Config( build_env=build_env, curriculum=[dict()], env_name='grid_bandit', threshold=-5, T=5, shape=(2, 2), n_arms=10, ) class GridBandit(TensorFlowEnv): """ Agent starts off in random location. Agent can move around the grid, and can perform a `look` action to reveal an integer stored at its current location in the grid. It can also pull a number of arms determined by `n_arms` (these arms can be pulled anywhere, they have no spatial location). Also, one arm is pulled at all times; taking an action to pull one of the arms persistently changes the arm the agent is taken to be pulling. The integer stored in the top left location gives the identity of the correct arm. The optimal strategy for an episode is to move to the top-left corner, perform the `look` action, and then pull the correct arm thereafter.
import numpy as np from dps.utils import Config from auto_yolo import envs readme = "xo convolutional experiment" config = Config() distributions = dict(n_train=100 * 2**np.arange(8)) durations = dict(long=dict(max_hosts=1, ppn=6, cpp=2, gpu_set="0,1", wall_time="12hours", project="rpp-bengioy", cleanup_time="10mins", slack_time="10mins", n_repeats=6, step_time_limit="12hours"), med=dict(max_hosts=1, ppn=3, cpp=2, gpu_set="0", wall_time="30mins", project="rpp-bengioy", cleanup_time="2mins", slack_time="2mins", n_repeats=3), short=dict(max_hosts=1, ppn=3,
from dps.register import RegisterBank from dps.env import TensorFlowEnv from dps.utils import Param, Config def build_env(): return PathDiscovery() config = Config( build_env=build_env, curriculum=[ dict(shape=(2, 2), threshold=6), dict(shape=(3, 3), threshold=4), dict(shape=(4, 4), threshold=2) ], env_name='path_discovery', shape=(3, 3), T=10, stopping_criteria="reward_per_ep,max", ) class PathDiscovery(TensorFlowEnv): """ The top-left cell stored an integers which says which of the other 3 corners is the rewarding corner. Agents use the "look" to see which integer is present at the current cell. """ T = Param() shape = Param() n_val = Param()
def run_experiment(name, config, readme, distributions=None, durations=None, alg=None, task="grid", name_variables=None, env_kwargs=None): name = sanitize(name) durations = durations or {} parser = argparse.ArgumentParser() parser.add_argument("duration", choices=list(durations.keys()) + ["local"]) args, _ = parser.parse_known_args() _config = DEFAULT_CONFIG.copy() env_kwargs = env_kwargs or {} env_kwargs['task'] = task env_config = get_env_config(**env_kwargs) _config.update(env_config) if alg: alg_config = getattr(alg_module, "{}_config".format(alg)) _config.update(alg_config) alg_name = sanitize(alg_config.alg_name) else: alg_name = "" _config.update(config) _config.update_from_command_line() _config.env_name = "{}_env={}".format(name, sanitize(env_config.env_name)) if args.duration == "local": _config.exp_name = "alg={}".format(alg_name) with _config: return training_loop() else: run_kwargs = Config( kind="slurm", pmem=5000, ignore_gpu=False, ) duration_args = durations[args.duration] if 'config' in duration_args: _config.update(duration_args['config']) del duration_args['config'] run_kwargs.update(durations[args.duration]) run_kwargs.update_from_command_line() if name_variables is not None: name_variables_str = "_".join("{}={}".format( sanitize(str(k)), sanitize(str(getattr(_config, k)))) for k in name_variables.split(",")) _config.env_name = "{}_{}".format(_config.env_name, name_variables_str) exp_name = "{}_alg={}_duration={}".format(_config.env_name, alg_name, args.duration) build_and_submit(name=exp_name, config=_config, distributions=distributions, **run_kwargs)
config = Config( exp_name="A2C", get_updater=A2C, n_controller_units=64, batch_size=16, n_val_rollouts=100, optimizer_spec="adam", opt_steps_per_update=1, sub_batch_size=0, epsilon=None, lr_schedule=1e-4, exploration_schedule=0.1, val_exploration_schedule=0.0, value_weight=1.0, value_epsilon=None, value_n_samples=0, value_direct=False, value_reg_weight=0.0, build_policy=BuildEpsilonSoftmaxPolicy(), build_controller=BuildLstmController(), policy_weight=1.0, entropy_weight=0.01, split=False, q_lmbda=1.0, v_lmbda=1.0, policy_importance_c=0, q_importance_c=None, v_importance_c=None, max_grad_norm=None, gamma=1.0, use_differentiable_loss=False, render_n_rollouts=4, )
cfg.load_path = { "network/representation": os.path.join(paths[0], "weights", "best_of_stage_0") } config = Config( max_steps=2e5, patience=20000, idx=0, repeat=0, prepare_func=prepare_func, math_input_network=IdentityFunction, # recurrent n_recurrent_units=256, # obj / arn n_repeats=1, d=256, symmetric_op="max", layer_norm=True, use_mask=True, decoder_kind="mlp", ) stage1_paths = dict( yolo_air= "/media/data/dps_data/logs/set_env=task=set/exp_alg=yolo-air_seed=1947474600_2018_08_29_19_37_34" )
codec='hevc', extra_args=['-preset', 'ultrafast']) plt.show() plt.close(fig) if __name__ == "__main__": from dps import cfg from dps.utils import Config config = Config( dataset_name='rooms_ring_camera', # dataset_name='rooms_free_camera_no_object_rotations', data_root_path='/media/data/gqn-dataset', file_range=(2, 3), mode='train', n_examples=100, read_batch_size=2, n_frames=10, N=16, ) config.update_from_command_line() with config: dset = GQN_Dataset(_no_cache=True) sess = tf.Session() with sess.as_default(): dset.visualize(cfg.N)
def build_policy(env, **kwargs): if cfg.room_angular: action_selection = ProductDist(Normal(), Normal(), Gamma()) else: action_selection = ProductDist(Normal(), Normal()) return Policy(action_selection, env.obs_shape, **kwargs) config = Config( build_env=build_env, n_controller_units=128, build_policy=build_policy, env_name='room', T=20, restart_prob=0.0, max_step=0.3, room_angular=False, l2l=False, reward_radius=0.5, n_val=100, ) class Room(TensorFlowEnv): action_names = ['delta_x', 'delta_y'] T = Param() reward_radius = Param() max_step = Param() restart_prob = Param()
from collections import defaultdict, OrderedDict import matplotlib.pyplot as plt from dps import cfg from dps.hyper import HyperSearch from dps.train import FrozenTrainingLoopData from dps.utils import ( process_path, Config, sha_cache, set_clear_cache, confidence_interval, standard_error, grid_subplots ) data_dir = "/media/data/Dropbox/experiment_data/active/aaai_2020/" cache_dir = process_path('/home/eric/.cache/dps_plots') plot_dir = '/media/data/Dropbox/writeups/spatially_invariant_air/aaai_2020/figures' plot_paths = Config() plot_paths[''] = '' verbose_cache = True def std_dev(ys): y_upper = y_lower = [_y.std() for _y in ys] return y_upper, y_lower def ci95(ys): conf_int = [confidence_interval(_y, 0.95) for _y in ys] y = ys.mean(axis=1) y_lower = y - np.array([ci[0] for ci in conf_int]) y_upper = np.array([ci[1] for ci in conf_int]) - y
def __call__(self, updater): fetched = self._fetch(updater) fetched = Config(fetched) self._prepare_fetched(updater, fetched) o = AttrDict(**fetched) N, T, image_height, image_width, _ = o.inp.shape # --- static --- fig_width = 2 * N fig_height = T figsize = self.fig_scale * np.asarray((fig_width, fig_height)) fig, axes = plt.subplots(fig_height, fig_width, figsize=figsize) fig.suptitle("n_updates={}".format(updater.n_updates), fontsize=20, fontweight='bold') axes = axes.reshape((fig_height, fig_width)) unique_ids = [int(i) for i in np.unique(o.obj_id)] if unique_ids[0] < 0: unique_ids = unique_ids[1:] color_by_id = {i: c for i, c in zip(unique_ids, itertools.cycle(self._BBOX_COLORS))} color_by_id[-1] = 'k' cmap = self._cmap(o.inp) for t, ax in enumerate(axes): for n in range(N): pres_time = o.presence[n, t, :] obj_id_time = o.obj_id[n, t, :] self.imshow(ax[2 * n], o.inp[n, t], cmap=cmap) n_obj = str(int(np.round(pres_time.sum()))) id_string = ('{}{}'.format(color_by_id[int(i)], i) for i in o.obj_id[n, t] if i > -1) id_string = ', '.join(id_string) title = '{}: {}'.format(n_obj, id_string) ax[2 * n + 1].set_title(title, fontsize=6 * self.fig_scale) self.imshow(ax[2 * n + 1], o.canvas[n, t], cmap=cmap) for i, (p, o_id) in enumerate(zip(pres_time, obj_id_time)): c = color_by_id[int(o_id)] if p > .5: r = patches.Rectangle( (o.left[n, t, i], o.top[n, t, i]), o.width[n, t, i], o.height[n, t, i], linewidth=self.linewidth, edgecolor=c, facecolor='none') ax[2 * n + 1].add_patch(r) for n in range(N): axes[0, 2 * n].set_ylabel('gt #{:d}'.format(n)) axes[0, 2 * n + 1].set_ylabel('rec #{:d}'.format(n)) for ax in axes.flatten(): # ax.grid(False) # ax.set_xticks([]) # ax.set_yticks([]) ax.set_axis_off() self.savefig('static', fig, updater) # --- moving --- fig_width = 2 * N n_objects = o.obj_id.shape[2] fig_height = n_objects + 2 figsize = self.fig_scale * np.asarray((fig_width, fig_height)) fig, axes = plt.subplots(fig_height, fig_width, figsize=figsize) title_text = fig.suptitle('', fontsize=10) axes = axes.reshape((fig_height, fig_width)) def func(t): title_text.set_text("t={}, n_updates={}".format(t, updater.n_updates)) for i in range(N): self.imshow(axes[0, 2*i], o.inp[i, t], cmap=cmap, vmin=0, vmax=1) self.imshow(axes[1, 2*i], o.canvas[i, t], cmap=cmap, vmin=0, vmax=1) for j in range(n_objects): if o.presence[i, t, j] > .5: c = color_by_id[int(o.obj_id[i, t, j])] r = patches.Rectangle( (o.left[i, t, j], o.top[i, t, j]), o.width[i, t, j], o.height[i, t, j], linewidth=self.linewidth, edgecolor=c, facecolor='none') axes[1, 2*i].add_patch(r) ax = axes[2+j, 2*i] self.imshow(ax, o.presence[i, t, j] * o.glimpse[i, t, j], cmap=cmap) title = '{:d} with p({:d}) = {:.02f}, id = {}'.format( int(o.presence[i, t, j]), i + 1, o.presence_prob[i, t, j], o.obj_id[i, t, j]) ax.set_title(title, fontsize=4 * self.fig_scale) if o.presence[i, t, j] > .5: c = color_by_id[int(o.obj_id[i, t, j])] for spine in 'bottom top left right'.split(): ax.spines[spine].set_color(c) ax.spines[spine].set_linewidth(2.) for ax in axes.flatten(): ax.xaxis.set_ticks([]) ax.yaxis.set_ticks([]) axes[0, 0].set_ylabel('ground-truth') axes[1, 0].set_ylabel('reconstruction') for j in range(n_objects): axes[j+2, 0].set_ylabel('glimpse #{}'.format(j + 1)) plt.subplots_adjust(left=0.02, right=.98, top=.95, bottom=0.02, wspace=0.1, hspace=0.15) anim = animation.FuncAnimation(fig, func, frames=T, interval=500) path = self.path_for('moving', updater, ext="mp4") anim.save(path, writer='ffmpeg', codec='hevc', extra_args=['-preset', 'ultrafast']) plt.close(fig) shutil.copyfile( path, os.path.join( os.path.dirname(path), 'latest_stage{:0>4}.mp4'.format(updater.stage_idx)))
class IrisEnvironment(Environment): def __init__(self): dsets, info = tfds.load('iris:2.*.*', with_info=True) train = dsets['train'] val = dsets['train'] test = dsets['train'] train.n_classes = 3 self.datasets = dict(train=train, val=val, test=test) def iris_config_func(): env_name = 'iris' eval_step = 1000 display_step = -1 checkpoint_step = -1 weight_step = -1 backup_step = -1 shuffle_buffer_size = 10000 build_env = IrisEnvironment return locals() iris_config = Config(iris_config_func()) mlp_config = copy.deepcopy(_mlp_config) mlp_config['get_updater'] = Updater
def test_simple_add(test_config): # Fully specify the config here so that this test is not affected by config changes external to this file. config = Config( env_name="test_simple_add_a2c", name="test_simple_add_a2c", get_updater=a2c.A2C, n_controller_units=32, batch_size=16, optimizer_spec="adam", opt_steps_per_update=20, sub_batch_size=0, epsilon=0.2, lr_schedule=1e-4, max_steps=501, build_policy=BuildEpsilonSoftmaxPolicy(), build_controller=BuildLstmController(), exploration_schedule=0.1, val_exploration_schedule=0.0, actor_exploration_schedule=None, policy_weight=1.0, value_weight=0.0, value_reg_weight=0.0, entropy_weight=0.01, split=False, q_lmbda=1.0, v_lmbda=1.0, policy_importance_c=0, q_importance_c=None, v_importance_c=None, max_grad_norm=None, gamma=1.0, use_differentiable_loss=False, use_gpu=False, display_step=500, seed=0, # env-specific build_env=simple_addition.build_env, T=30, curriculum=[ dict(width=1), dict(width=2), dict(width=3), ], base=10, final_reward=True, ) config.update(test_config) n_repeats = 1 # Haven't made it completely deterministic yet, so keep it at 1. results = defaultdict(int) threshold = 0.15 for i in range(n_repeats): config = config.copy() output = _raw_run(config) stdout = output.path_for('stdout') result = _get_deterministic_output(stdout) results[result] += 1 assert output.history[-1]['best_01_loss'] < threshold if len(results) != 1: for r in sorted(results): print("\n" + "*" * 80) print("The following occurred {} times:\n".format(results[r])) print(r) raise Exception("Results were not deterministic.") assert len(output.config.curriculum) == 3 config.load_path = output.path_for('weights/best_of_stage_2') assert os.path.exists(config.load_path + ".index") assert os.path.exists(config.load_path + ".meta") # Load one of the hypotheses, train it for a bit, make sure the accuracy is still high. config.curriculum = [output.config.curriculum[-1]] config = config.copy() output = _raw_run(config) stdout = output.path_for('stdout') result = _get_deterministic_output(stdout) results[result] += 1 assert output.history[-1]['best_01_loss'] < threshold # Load one of the hypotheses, don't train it at all, make sure the accuracy is still high. config.do_train = False config = config.copy() output = _raw_run(config) stdout = output.path_for('stdout') result = _get_deterministic_output(stdout) results[result] += 1 assert output.history[-1]['best_01_loss'] < threshold
config = Config( env_name="collection_game", build_env=build_env, T=20, max_episode_length=20, entropy_weight=0.0, batch_size=16, render_hook=Collection_RenderHook(N=16), render_step=1000, eval_step=100, display_step=100, stopping_criteria="reward_per_ep,max", threshold=1000, image_shape=(25, 25), background_colour="white", image_obs=False, max_entities=5, agent_spec=dict(appearance="plus", color="green", shape=entity_size), entity_specs=[ dict(appearance="circle", color="blue", shape=entity_size, reward=-1), dict(appearance="square", color="red", shape=entity_size, reward=1) ], min_entities=5, max_overlap=0.0, step_size=5, corner=None, grid=False, explore=False, discrete_actions=False, )