def image_dataset(ds_dir, batch_size, image_size=None, norm_range=None, shuffle=True): def preprocess_image(image): image = tf.image.decode_jpeg(image, channels=3) if image_size: image = tf.image.resize(image, image_size) if norm_range: image = norm_image(image, norm_range) return image def load_and_preprocess_image(path): image = tf.read_file(path) return preprocess_image(image) if isinstance(ds_dir, list): all_image_paths = ds_dir else: ds_dir = Path(ds_dir) assert_colorize(ds_dir.is_dir(), f'Not a valid directory {ds_dir}') all_image_paths = [str(f) for f in Path(ds_dir).glob('*')] pwc(f'Total Images: {len(all_image_paths)}', 'magenta') ds = tf.data.Dataset.from_tensor_slices(all_image_paths) if shuffle: ds = ds.shuffle(buffer_size=len(all_image_paths)) ds = ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) ds = ds.repeat() ds = ds.batch(batch_size) ds = ds.prefetch(tf.data.experimental.AUTOTUNE) image = ds.make_one_shot_iterator().get_next('images') return ds, image
def snconvtrans(self, x, filters, kernel_size, strides, padding='same', use_bias=True, kernel_initializer=tc.layers.xavier_initializer(), name=None): name = self.get_name(name, 'snconvtrans') if isinstance(kernel_size, list): assert_colorize(len(kernel_size) == 2) k_h, k_w = kernel_size else: assert_colorize(isinstance(kernel_size, int)) k_h = k_w = kernel_size B, H, W, _ = x.shape.as_list() # Compute output shape if padding.lower() == 'valid': output_shape = [B, (H-1) * strides + k_h, (W-1) * strides + k_w, filters] else: output_shape = [B, H * strides, W * strides, filters] padding = 'SAME' # treat all other forms padding as same with tf.variable_scope(name): w = tf.get_variable('weight', shape=[k_h, k_w, filters, x.shape[-1]], initializer=kernel_initializer, regularizer=self.l2_regularizer) w = tf_utils.spectral_norm(w) x = tf.nn.conv2d_transpose(x, w, output_shape=output_shape, strides=[1, strides, strides, 1], padding=padding.upper()) if use_bias: b = tf.get_variable('bias', [filters], initializer=tf.zeros_initializer()) x = tf.nn.bias_add(x, b) return x
def upsample_residual(self, x, filters, padding, sn, norm=tf.layers.batch_normalization, activation=tf.nn.relu, name=None): """ upsample a 4-D input tensor in a residual module, follows this implementation https://github.com/brain-research/self-attention-gan/blob/ad9612e60f6ba2b5ad3d3340ebae60f724636d75/generator.py#L78 x: Input layer: Layer function, Caution: _reset_counter should be called first if this residual module is reused """ assert_colorize(padding.lower() != 'valid') assert_colorize(len(x.shape.as_list()), f'Input x should be a 4-D tensor, but get {x.shape.as_list()}') name = self.get_name(name, 'residual') y = x conv = self.snconv if sn else self.conv with tf.variable_scope(name): y = tf_utils.norm_activation(y, norm=norm, activation=activation, training=self.training, name='NormAct_1') y = self.upsample_conv(y, filters, 3, 1, padding=padding, sn=sn, name='UpsampleConv') y = tf_utils.norm_activation(y, norm=norm, activation=activation, training=self.training, name='NormAct_2') y = conv(y, filters, 3, 1, padding=padding, name='Conv') x = self.upsample_conv(x, filters, 1, 1, padding='VALID', sn=sn, name='UpsampleConv1x1') x = x + y return x
def merge(images, size): assert_colorize( len(images.shape) == 4, f'images should be 4D, but get shape {images.shape}') h, w = images.shape[1], images.shape[2] image_type = images.dtype if (images.shape[3] in (3, 4)): c = images.shape[3] img = np.zeros((h * size[0], w * size[1], c), dtype=image_type) for idx, image in enumerate(images): i = idx % size[1] j = idx // size[1] img[j * h:j * h + h, i * w:i * w + w, :] = image if np.issubdtype(image_type, np.uint8): return img if np.min(img) < 0: # for image in range [-1, 1], make it in range [0, 1] img = (img + 1) / 2 img = img_as_ubyte(img) return img elif images.shape[3] == 1: img = np.zeros((h * size[0], w * size[1]), dtype=image_type) for idx, image in enumerate(images): i = idx % size[1] j = idx // size[1] img[j * h:j * h + h, i * w:i * w + w] = image[:, :, 0] return img else: NotImplementedError
def _record_stats_impl(self, kwargs): if 'worker_no' not in kwargs: assert_colorize( len(self.stats) == 1, 'Specify worker_no for multi-worker logs') no = 0 else: no = kwargs['worker_no'] del kwargs['worker_no'] # if global_step appeas in kwargs, use it when adding summary to tensorboard if 'global_step' in kwargs: step = kwargs['global_step'] del kwargs['global_step'] else: step = None feed_dict = {} for k, v in kwargs.items(): assert_colorize(k in self.stats[no], f'{k} is not a valid stats type') feed_dict.update({self.stats[no][k]: v}) score_count, summary = self.sess.run( [self.stats[no]['counter'], self.stats[no]['log_op']], feed_dict=feed_dict) self.writer.add_summary(summary, step or score_count)
def snconv(self, x, filters, kernel_size, strides=1, padding='same', use_bias=True, kernel_initializer=tc.layers.xavier_initializer(), name=None): """ Spectral normalized convolutional layer """ name = self.get_name(name, 'snconv') if isinstance(kernel_size, list): assert_colorize(len(kernel_size) == 2) H, W = kernel_size else: assert_colorize(isinstance(kernel_size, int)) H = W = kernel_size with tf.variable_scope(name): if padding.lower() != 'same' and padding.lower() != 'valid': x = tf_utils.padding(x, kernel_size, strides, mode=padding) padding = 'valid' w = tf.get_variable('weight', shape=[H, W, x.shape[-1], filters], initializer=kernel_initializer, regularizer=self.l2_regularizer) w = tf_utils.spectral_norm(w) x = tf.nn.conv2d(x, w, strides=(1, strides, strides, 1), padding=padding.upper()) if use_bias: b = tf.get_variable('bias', [filters], initializer=tf.zeros_initializer()) x = tf.nn.bias_add(x, b) return x
def merge(self, local_buffer, length): """ Merge a local buffer to the replay buffer, useful for distributed algorithms """ assert_colorize( length < self.capacity, f'Local buffer cannot be largeer than the replay: {length} vs. {self.capacity}' ) with self.locker: self._merge(local_buffer, length)
def gru(self, x, units, return_sequences=False): assert_colorize(len(x.shape.as_list()) == 3, f'Imput Shape Error: desire shape of dimension 3, get {len(x.shape.as_list())}') gru_cell = tk.layers.CuDNNGRU(units, return_sequences=return_sequences, return_state=True) initial_state = gru_cell.get_initial_state(x) x, final_state = gru_cell(x, initial_state=initial_state) return x, (initial_state, final_state)
def sample(self): assert_colorize(self.good_to_learn, 'There are not sufficient transitions to start learning --- ' f'transitions in buffer: {len(self)}\t' f'minimum required size: {self.min_size}') with self.locker: samples = self._sample() return samples
def _popitem(self, kwargs): assert_colorize(isinstance(kwargs, dict)) while len(kwargs) != 0: k, v = kwargs.popitem() if not isinstance(v, list) and not isinstance(v, dict): v = [v] if len(v) != 0: break return deepcopy(k), deepcopy(v)
def save_image(images, path, size=None): assert_colorize( len(images.shape) == 4, f'images should be 4D, but get shape {images.shape}') num_images = images.shape[0] if size is None: size = utils.squarest_grid_size(num_images) images = merge(images, size) utils.check_make_dir(path) imsave(path, images)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('logdir', nargs='*') parser.add_argument('--outdir', '-o') parser.add_argument('--legend', nargs='*') parser.add_argument('--legendtag', '-tag', default='Algo') parser.add_argument('--title') parser.add_argument('--x', default='Episodes', nargs='*') parser.add_argument('--y', default='ScoreMean', nargs='*') parser.add_argument( '--timing', default=None, choices=['Train', 'Eval', None], help= 'select timing to plot; both training and evaluation stats are plotted by default' ) args = parser.parse_args() # by default assume using `python utility/plot.py` to call this file if len(args.logdir) != 1: dirs = [f'logs/{d}' for d in args.logdir] else: dirs = glob.glob(f'logs/{args.logdir[0]}/logs/GS*') # set up legends if args.legend: assert_colorize( len(args.legend) == len(dirs), "Must give a legend title for each set of experiments.") legends = args.legend else: legends = [os.path.basename(path) for path in dirs] legends = [l[3:] if l.startswith('GS-') else l for l in legends] tag = args.legendtag pwc('Directories:') for d in dirs: pwc(f'\t{d}') pwc('Legends:') for l in legends: pwc(f'\t{l}') data = [] for logdir, legend_title in zip(dirs, legends): data += get_datasets(logdir, tag, legend_title) xs = args.x if isinstance(args.x, list) else [args.x] ys = args.y if isinstance(args.y, list) else [args.y] for x in xs: for y in ys: outdir = f'results/{args.outdir}-{x}-{y}' plot_data(data, x, y, outdir, tag, args.title, args.timing)
def __init__(self, filename, capacity, batch_size, state_space, action_dim): self.memory = dict(state=np.zeros((capacity, *state_space)), action=np.zeros((capacity, action_dim))) self.filename = filename self.batch_size = batch_size self.capacity = capacity self.idx = 0 self.full = False if os.path.exists(filename): with open(filename, 'rb') as f: data = pickle.loads(f.read()) self.memory['state'] = data['state'] self.memory['action'] = data['action'] assert_colorize(len(self.memory['state']) == capacity) assert_colorize(len(self.memory['action']) == capacity) self.full = True assert_colorize( len(self.memory) == 2, 'Memory contains redundant data') assert_colorize( len(self.memory['state']) == len(self.memory['action']), f"Inconsistent lengths. #state: {len(self.memory['state'])}\t#action: {len(self.memory['action'])}" )
def copy_buffer(dest_buffer, dest_start, dest_end, orig_buffer, orig_start, orig_end, dest_keys=True): assert_colorize(dest_end - dest_start == orig_end - orig_start, 'Inconsistent lengths of dest_buffer and orig_buffer.') if dest_end - dest_start == 0: return for key in (dest_buffer if dest_keys else orig_buffer).keys(): dest_buffer[key][dest_start:dest_end] = orig_buffer[key][ orig_start:orig_end]
def __init__(self, args): assert_colorize('n_envs' in args, f'Please specify n_envs in args.yaml beforehand') n_envs = args['n_envs'] self.envs = [gym.make(args['name']) for i in range(n_envs)] [env.seed(args['seed'] + 10 * i) for i, env in enumerate(self.envs)] env = self.envs[0] self.state_space = env.observation_space.shape self.is_action_discrete = isinstance(env.action_space, gym.spaces.Discrete) self.action_space = env.action_space self.action_dim = env.action_space.n if self.is_action_discrete else env.action_space.shape[0] self.action_dist_type = action_dist_type(env) self.n_envs = n_envs self.max_episode_steps = int(float(args['max_episode_steps'])) if 'max_episode_steps' in args \ else env.spec.max_episode_steps
def log_tabular(self, key, val): """ Log a value of some diagnostic. Call this only once for each diagnostic quantity, each iteration. After using ``log_tabular`` to store values for each diagnostic, make sure to call ``dump_tabular`` to write them out to file and stdout (otherwise they will not get saved anywhere). """ if self.first_row: self.log_headers.append(key) else: assert_colorize( key in self.log_headers, f"Trying to introduce a new key {key} that you didn't include in the first iteration" ) assert_colorize( key not in self.log_current_row, f"You already set {key} this iteration. Maybe you forgot to call dump_tabular()" ) self.log_current_row[key] = val
def padding(x, kernel_size, strides, mode='constant', name=None): """ This function pads x so that a convolution with the same args downsamples x by a factor of strides. It achieves it using the following equation: W // S = (W - k_w + 2P) / S + 1 """ assert_colorize( mode.lower() == 'constant' or mode.lower() == 'reflect' or mode.lower() == 'symmetric', f'Padding should be "constant", "reflect", or "symmetric", but got {mode}.' ) H, W = x.shape.as_list()[1:3] if isinstance(kernel_size, list) and len(kernel_size) == 2: k_h, k_w = kernel_size else: k_h = k_w = kernel_size p_h1 = int(((H / strides - 1) * strides - H + k_h) // strides) p_h2 = int(((H / strides - 1) * strides - H + k_h) - p_h1) p_w1 = int(((W / strides - 1) * strides - W + k_w) // strides) p_w2 = int(((W / strides - 1) * strides - W + k_w) - p_w1) return tf.pad(x, [[0, 0], [p_h1, p_h2], [p_w1, p_w2], [0, 0]], mode, name=name)
def _record_stats_impl(self, kwargs): if 'worker_no' not in kwargs: assert_colorize( len(self.stats) == 1, 'Specify worker_no for multi-worker logs') no = 0 else: no = kwargs['worker_no'] del kwargs['worker_no'] feed_dict = {} for k, v in kwargs.items(): assert_colorize(k in self.stats[no], f'{k} is not a valid stats type') feed_dict.update({self.stats[no][k]: v}) score_count, summary = self.sess.run( [self.stats[no]['counter'], self.stats[no]['log_op']], feed_dict=feed_dict) self.writer.add_summary(summary, score_count)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('logdir') parser.add_argument('--outname', '-o') parser.add_argument('--legend', nargs='*') parser.add_argument('--x', default='Iteration', nargs='*') parser.add_argument('--y', default='AvgScore', nargs='*') args = parser.parse_args() # by default assume using `python utility/plot.py` to call this file dirs = glob.glob(f'logs/{args.logdir}/*/GS-*') # set up legends if args.legend: assert_colorize( len(args.legend) == len(dirs), "Must give a legend title for each set of experiments.") legends = args.legend else: legends = [os.path.basename(path)[3:] for path in dirs] pwc('Directories:') pwc(dirs) pwc('Legends:') pwc(legends) data = [] for logdir, legend_title in zip(dirs, legends): data += get_datasets(logdir, legend_title) if isinstance(args.x, list) or isinstance(args.y, list): xs = args.x if isinstance(args.x, list) else [args.x] ys = args.y if isinstance(args.y, list) else [args.y] for x in xs: for y in ys: outpath = f'results/{args.outname}/{x}-{y}.png' plot_data(data, x, y, outpath) else: outpath = f'results/{args.outname}.png' plot_data(data, args.x, args.y, outpath)
def _change_args(self, **kwargs): if kwargs == {}: # basic case old_model_name = self.agent_args['model_name'] for i in range(1, self.n_trials+1): if self.n_trials > 1: self.agent_args['model_name'] += f'/trial{i}' # arguments should be deep copied here, # otherwise args will be reset if sub-process runs after self.env_args['seed'] = 10 * i p = Process(target=self.train_func, args=(deepcopy(self.env_args), deepcopy(self.agent_args), deepcopy(self.buffer_args), self.render)) self.agent_args['model_name'] = old_model_name p.start() time.sleep(1) # ensure sub-processs start in order self.processes.append(p) else: # recursive case kwargs_copy = deepcopy(kwargs) key, value = self._popitem(kwargs_copy) valid_args = None for args in [self.env_args, self.agent_args, self.buffer_args]: if key in args: assert_colorize(valid_args is None, f'Conflict: found {key} in both {valid_args} and {args}!') valid_args = args err_msg = lambda k, v: f'Invalid Argument: {k}={v}' assert_colorize(valid_args is not None, err_msg(key, value)) if isinstance(value, dict) and len(value) != 0: # For simplicity, we do not further consider the case when value is a dict of dicts here k, v = self._popitem(value) assert_colorize(k in valid_args[key], err_msg(k, v)) if len(value) != 0: # if there is still something left in value, put value back into kwargs kwargs_copy[key] = value self._safe_call(f'-{key}', lambda: self._recursive_trial(valid_args[key], k, v, kwargs_copy)) else: self._recursive_trial(valid_args, key, value, kwargs_copy)
def lstm_norm(self, xs, units, masks=None): """lstm with masks and layer normalization reference code: https://github.com/openai/baselines/blob/8c2aea2addc9f3ba36d4a0c937e6a2d09830afc7/baselines/a2c/utils.py#L81 Arguments: xs 3d Tensor -- input data of shape [n_batch, n_seq, dim] units int -- size of hidden/cell state masks 2d Tensor -- masks, must match the first 2 dimensions of xs Returns: ys 3d Tensor -- output date of shape [n_batch, n_seq, units] initial_state, final_state """ assert_colorize(len(xs.shape.as_list()) == 3, f'Imput Shape Error: desire tensor of 3 dimensions, get {len(xs.shape.as_list())}') assert_colorize(masks is None or len(masks.shape.as_list()) == 2, f'Masks Shape Error: desire None or tensor of 2 dimensions, get {len(masks.shape.as_list())}') kernel_initializer = tf_utils.kaiming_initializer() def ln(x, gamma, beta, eps=1e-5, axes=[1]): mean, var = tf.nn.moments(x, axes=axes, keep_dims=True) x = (x - mean) / tf.sqrt(var + eps) x = gamma * x + beta return x n_batch, n_steps, x_dim = xs.shape.as_list() xw_shape = [x_dim, 4*units] xb_shape = [4*units] hw_shape = [units, 4*units] hb_shape = [4*units] with tf.variable_scope('lstm_norm'): x_w = tf.get_variable('x_w', shape=xw_shape, initializer=kernel_initializer, regularizer=self.l2_regularizer) x_g = tf.get_variable('x_g', [4*units], initializer=tf.constant_initializer(1.0)) x_b = tf.get_variable('x_b', shape=xb_shape, initializer=tf.zeros_initializer()) h_w = tf.get_variable('h_w', shape=hw_shape, initializer=kernel_initializer, regularizer=self.l2_regularizer) h_g = tf.get_variable('h_g', [4*units], initializer=tf.constant_initializer(1.0)) h_b = tf.get_variable('h_b', shape=hb_shape, initializer=tf.zeros_initializer()) b = tf.get_variable('b', [4*units], initializer=tf.zeros_initializer()) c_g = tf.get_variable('c_g', [units], initializer=tf.constant_initializer(1.0)) c_b = tf.get_variable('c_b', [units], initializer=tf.zeros_initializer()) initial_state = tf.zeros([n_batch, 2*units], name='initial_state') initial_state = tf.split(value=initial_state, num_or_size_splits=2, axis=1) h, c = initial_state xs = tf.unstack(xs, n_steps, axis=1) if masks is not None: masks = tf.unstack(masks, n_steps, axis=1) ys = [] for x in xs if masks is None else zip(xs, masks): if masks is not None: x, m = x m = m[..., None] c *= m h *= m z = ln(tf.matmul(x, x_w), x_g, x_b) + ln(tf.matmul(h, h_w), h_g, h_b) + b f, i, o, u = tf.split(value=z, num_or_size_splits=4, axis=1) f = tf.nn.sigmoid(f) i = tf.nn.sigmoid(i) o = tf.nn.sigmoid(o) u = tf.tanh(u) c = f * c + i * u h = o * tf.tanh(ln(c, c_g, c_b)) ys.append(h) final_state = (h, c) ys = tf.stack(ys, 1) return ys, (initial_state, final_state)
algorithm = list(cmd_args.algorithm) processes = [] for algo in algorithm: arg_file = get_arg_file(algo) main = import_main(algo) render = cmd_args.render if cmd_args.checkpoint != '': args = load_args(arg_file) env_args = args['env'] agent_args = args['agent'] buffer_args = args['buffer'] if 'buffer' in args else {} checkpoint = cmd_args.checkpoint assert_colorize(os.path.exists(checkpoint), 'Model file does not exists') agent_args['model_root_dir'], agent_args['model_name'] = os.path.split(checkpoint) agent_args['log_root_dir'], _ = os.path.split(agent_args['model_root_dir']) agent_args['log_root_dir'] += '/logs' main(env_args, agent_args, buffer_args, render=render) else: prefix = cmd_args.prefix # Although random parameter search is in general better than grid search, # we here continue to go with grid search since it is easier to deal with architecture search gs = GridSearch(arg_file, main, render=render, n_trials=cmd_args.trials, dir_prefix=prefix, separate_process=len(algorithm) > 1) # Grid search happens here if algo == 'ppo':
def merge(self, local_buffer, length, start=0): """ Merge a local buffer to the replay buffer, useful for distributed algorithms """ assert_colorize(length < self.capacity, 'Local buffer is too large') with self.locker: self._merge(local_buffer, length, start)
if __name__ == '__main__': cmd_args = parse_cmd_args() algorithm = cmd_args.algorithm arg_file = get_arg_file(algorithm) render = True if cmd_args.render == 'true' else False if cmd_args.file != '': args = load_args(arg_file) env_args = args['env'] agent_args = args['agent'] buffer_args = args['buffer'] if 'buffer' in args else {} model_file = cmd_args.file assert_colorize(os.path.exists(model_file), 'Model file does not exists') agent_args['model_root_dir'], agent_args['model_name'] = os.path.split( model_file) agent_args['log_root_dir'], _ = os.path.split( agent_args['model_root_dir']) agent_args['log_root_dir'] += '/logs' main(env_args, agent_args, buffer_args, render=render) else: prefix = cmd_args.prefix args = load_args(arg_file) env_args = args['env'] agent_args = args['agent'] buffer_args = args['buffer'] if 'buffer' in args else {} agent_args['algorithm'] = cmd_args.algorithm env_args['name'] = cmd_args.env
def _recursive_trial(self, arg, key, value, kwargs): assert_colorize(isinstance(value, list), f'Expect value of type list, not {type(value)}: {value}') for v in value: arg[key] = v self._safe_call(f'-{key}={v}', lambda: self._change_args(**kwargs))