def __init__(self, env, hidden_size, entcoeff=0.001, lr_rate=1e-3, scope="adversary"): self.scope = scope self.observation_shape = env.observation_space.shape self.actions_shape = env.action_space.shape self.input_shape = tuple([ o + a for o, a in zip(self.observation_shape, self.actions_shape) ]) self.num_actions = env.action_space.shape[0] self.hidden_size = hidden_size self.build_ph() # Build grpah generator_logits = self.build_graph(self.generator_obs_ph, self.generator_acs_ph, reuse=False) expert_logits = self.build_graph(self.expert_obs_ph, self.expert_acs_ph, reuse=True) # Build accuracy generator_acc = tf.reduce_mean( tf.to_float(tf.nn.sigmoid(generator_logits) < 0.5)) expert_acc = tf.reduce_mean( tf.to_float(tf.nn.sigmoid(expert_logits) > 0.5)) # Build regression loss # let x = logits, z = targets. # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) generator_loss = tf.nn.sigmoid_cross_entropy_with_logits( logits=generator_logits, labels=tf.zeros_like(generator_logits)) generator_loss = tf.reduce_mean(generator_loss) expert_loss = tf.nn.sigmoid_cross_entropy_with_logits( logits=expert_logits, labels=tf.ones_like(expert_logits)) expert_loss = tf.reduce_mean(expert_loss) # Build entropy loss logits = tf.concat([generator_logits, expert_logits], 0) entropy = tf.reduce_mean(logit_bernoulli_entropy(logits)) entropy_loss = -entcoeff * entropy # Loss + Accuracy terms self.losses = [ generator_loss, expert_loss, entropy, entropy_loss, generator_acc, expert_acc ] self.loss_name = [ "generator_loss", "expert_loss", "entropy", "entropy_loss", "generator_acc", "expert_acc" ] self.total_loss = generator_loss + expert_loss + entropy_loss # Build Reward for policy self.reward_op = -tf.log(1 - tf.nn.sigmoid(generator_logits) + 1e-8) var_list = self.get_trainable_variables() self.lossandgrad = U.function([ self.generator_obs_ph, self.generator_acs_ph, self.expert_obs_ph, self.expert_acs_ph ], self.losses + [U.flatgrad(self.total_loss, var_list)])
def pdfromlatent(self, latent_vector, init_scale=1.0, init_bias=0.0): mean = fc(latent_vector, 'pi', self.size, init_scale=init_scale, init_bias=init_bias) logstd = tf.get_variable(name='pi/logstd', shape=[1, self.size], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) return self.pdfromflat(pdparam), mean
def __init__(self, inputs_tf, dimo, dimg, dimu, max_u, o_stats, g_stats, hidden, layers, **kwargs): """The actor-critic network and related training pytorch. Args: inputs_tf (dict of tensors): all necessary inputs for the network: the observation (o), the goal (g), and the action (u) dimo (int): the dimension of the observations dimg (int): the dimension of the goals dimu (int): the dimension of the actions max_u (float): the maximum magnitude of actions; action outputs will be scaled accordingly o_stats (baselines.her.Normalizer): normalizer for observations g_stats (baselines.her.Normalizer): normalizer for goals hidden (int): number of hidden units that should be used in hidden layers layers (int): number of hidden layers """ self.o_tf = inputs_tf['o'] self.g_tf = inputs_tf['g'] self.u_tf = inputs_tf['u'] # Prepare inputs for actor and critic. o = self.o_stats.normalize(self.o_tf) g = self.g_stats.normalize(self.g_tf) input_pi = tf.concat(axis=1, values=[o, g]) # for actor # Networks. with tf.variable_scope('pi'): self.pi_tf = self.max_u * tf.tanh( nn(input_pi, [self.hidden] * self.layers + [self.dimu])) with tf.variable_scope('Q'): # for policy training input_Q = tf.concat(axis=1, values=[o, g, self.pi_tf / self.max_u]) self.Q_pi_tf = nn(input_Q, [self.hidden] * self.layers + [1]) # for critic training input_Q = tf.concat(axis=1, values=[o, g, self.u_tf / self.max_u]) self._input_Q = input_Q # exposed for tests self.Q_tf = nn(input_Q, [self.hidden] * self.layers + [1], reuse=True)
def build_graph(self, obs_ph, acs_ph, reuse=False): with tf.variable_scope(self.scope): if reuse: tf.get_variable_scope().reuse_variables() with tf.variable_scope("obfilter"): self.obs_rms = RunningMeanStd(shape=self.observation_shape) obs = (obs_ph - self.obs_rms.mean / self.obs_rms.std) _input = tf.concat( [obs, acs_ph], axis=1) # concatenate the two input -> form a transition p_h1 = tf.contrib.layers.fully_connected(_input, self.hidden_size, activation_fn=tf.nn.tanh) p_h2 = tf.contrib.layers.fully_connected(p_h1, self.hidden_size, activation_fn=tf.nn.tanh) logits = tf.contrib.layers.fully_connected( p_h2, 1, activation_fn=tf.identity) return logits
def compute_gradients(self, loss, var_list, **kwargs): grads_and_vars = tf.train.AdamOptimizer.compute_gradients( self, loss, var_list, **kwargs) grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None] flat_grad = tf.concat( [tf.reshape(g, (-1, )) for g, v in grads_and_vars], axis=0) shapes = [v.shape.as_list() for g, v in grads_and_vars] sizes = [int(np.prod(s)) for s in shapes] num_tasks = self.comm.Get_size() buf = np.zeros(sum(sizes), np.float32) def _collect_grads(flat_grad): self.comm.Allreduce(flat_grad, buf, op=MPI.SUM) np.divide(buf, float(num_tasks), out=buf) return buf avg_flat_grad = tf.py_func(_collect_grads, [flat_grad], tf.float32) avg_flat_grad.set_shape(flat_grad.shape) avg_grads = tf.split(avg_flat_grad, sizes, axis=0) avg_grads_and_vars = [(tf.reshape(g, v.shape), v) for g, (_, v) in zip(avg_grads, grads_and_vars)] return avg_grads_and_vars
def getFactors(self, g, varlist): graph = tf.get_default_graph() factorTensors = {} fpropTensors = [] bpropTensors = [] opTypes = [] fops = [] def searchFactors(gradient, graph): # hard coded search stratergy bpropOp = gradient.op bpropOp_name = bpropOp.name bTensors = [] fTensors = [] # combining additive gradient, assume they are the same op type and # indepedent if 'AddN' in bpropOp_name: factors = [] for g in gradient.op.inputs: factors.append(searchFactors(g, graph)) op_names = [item['opName'] for item in factors] # TO-DO: need to check all the attribute of the ops as well print(gradient.name) print(op_names) print(len(np.unique(op_names))) assert len(np.unique(op_names)) == 1, gradient.name + \ ' is shared among different computation OPs' bTensors = reduce(lambda x, y: x + y, [item['bpropFactors'] for item in factors]) if len(factors[0]['fpropFactors']) > 0: fTensors = reduce( lambda x, y: x + y, [item['fpropFactors'] for item in factors]) fpropOp_name = op_names[0] fpropOp = factors[0]['op'] else: fpropOp_name = re.search( 'gradientsSampled(_[0-9]+|)/(.+?)_grad', bpropOp_name).group(2) fpropOp = graph.get_operation_by_name(fpropOp_name) if fpropOp.op_def.name in KFAC_OPS: # Known OPs ### bTensor = [ i for i in bpropOp.inputs if 'gradientsSampled' in i.name ][-1] bTensorShape = fpropOp.outputs[0].get_shape() if bTensor.get_shape()[0].value == None: bTensor.set_shape(bTensorShape) bTensors.append(bTensor) ### if fpropOp.op_def.name == 'BiasAdd': fTensors = [] else: fTensors.append([ i for i in fpropOp.inputs if param.op.name not in i.name ][0]) fpropOp_name = fpropOp.op_def.name else: # unknown OPs, block approximation used bInputsList = [ i for i in bpropOp.inputs[0].op.inputs if 'gradientsSampled' in i.name if 'Shape' not in i.name ] if len(bInputsList) > 0: bTensor = bInputsList[0] bTensorShape = fpropOp.outputs[0].get_shape() if len(bTensor.get_shape()) > 0 and bTensor.get_shape( )[0].value == None: bTensor.set_shape(bTensorShape) bTensors.append(bTensor) fpropOp_name = opTypes.append('UNK-' + fpropOp.op_def.name) return { 'opName': fpropOp_name, 'op': fpropOp, 'fpropFactors': fTensors, 'bpropFactors': bTensors } for t, param in zip(g, varlist): if KFAC_DEBUG: print(('get factor for ' + param.name)) factors = searchFactors(t, graph) factorTensors[param] = factors ######## # check associated weights and bias for homogeneous coordinate representation # and check redundent factors # TO-DO: there may be a bug to detect associate bias and weights for # forking layer, e.g. in inception models. for param in varlist: factorTensors[param]['assnWeights'] = None factorTensors[param]['assnBias'] = None for param in varlist: if factorTensors[param]['opName'] == 'BiasAdd': factorTensors[param]['assnWeights'] = None for item in varlist: if len(factorTensors[item]['bpropFactors']) > 0: if (set(factorTensors[item]['bpropFactors']) == set( factorTensors[param]['bpropFactors'] )) and (len(factorTensors[item]['fpropFactors']) > 0): factorTensors[param]['assnWeights'] = item factorTensors[item]['assnBias'] = param factorTensors[param][ 'bpropFactors'] = factorTensors[item][ 'bpropFactors'] ######## ######## # concatenate the additive gradients along the batch dimension, i.e. # assuming independence structure for key in ['fpropFactors', 'bpropFactors']: for i, param in enumerate(varlist): if len(factorTensors[param][key]) > 0: if (key + '_concat') not in factorTensors[param]: name_scope = factorTensors[param][key][0].name.split( ':')[0] with tf.name_scope(name_scope): factorTensors[param][key + '_concat'] = tf.concat( factorTensors[param][key], 0) else: factorTensors[param][key + '_concat'] = None for j, param2 in enumerate(varlist[(i + 1):]): if (len(factorTensors[param][key]) > 0) and (set( factorTensors[param2][key]) == set( factorTensors[param][key])): factorTensors[param2][key] = factorTensors[param][key] factorTensors[param2][ key + '_concat'] = factorTensors[param][key + '_concat'] ######## if KFAC_DEBUG: for items in zip(varlist, fpropTensors, bpropTensors, opTypes): print((items[0].name, factorTensors[item])) self.factors = factorTensors return factorTensors
def getKfacPrecondUpdates(self, gradlist, varlist): updatelist = [] vg = 0. assert len(self.stats) > 0 assert len(self.stats_eigen) > 0 assert len(self.factors) > 0 counter = 0 grad_dict = {var: grad for grad, var in zip(gradlist, varlist)} for grad, var in zip(gradlist, varlist): GRAD_RESHAPE = False GRAD_TRANSPOSE = False fpropFactoredFishers = self.stats[var]['fprop_concat_stats'] bpropFactoredFishers = self.stats[var]['bprop_concat_stats'] if (len(fpropFactoredFishers) + len(bpropFactoredFishers)) > 0: counter += 1 GRAD_SHAPE = grad.get_shape() if len(grad.get_shape()) > 2: # reshape conv kernel parameters KW = int(grad.get_shape()[0]) KH = int(grad.get_shape()[1]) C = int(grad.get_shape()[2]) D = int(grad.get_shape()[3]) if len(fpropFactoredFishers) > 1 and self._channel_fac: # reshape conv kernel parameters into tensor grad = tf.reshape(grad, [KW * KH, C, D]) else: # reshape conv kernel parameters into 2D grad grad = tf.reshape(grad, [-1, D]) GRAD_RESHAPE = True elif len(grad.get_shape()) == 1: # reshape bias or 1D parameters D = int(grad.get_shape()[0]) grad = tf.expand_dims(grad, 0) GRAD_RESHAPE = True else: # 2D parameters C = int(grad.get_shape()[0]) D = int(grad.get_shape()[1]) if (self.stats[var]['assnBias'] is not None) and not self._blockdiag_bias: # use homogeneous coordinates only works for 2D grad. # TO-DO: figure out how to factorize bias grad # stack bias grad var_assnBias = self.stats[var]['assnBias'] grad = tf.concat( [grad, tf.expand_dims(grad_dict[var_assnBias], 0)], 0) # project gradient to eigen space and reshape the eigenvalues # for broadcasting eigVals = [] for idx, stats in enumerate( self.stats[var]['fprop_concat_stats']): Q = self.stats_eigen[stats]['Q'] e = detectMinVal(self.stats_eigen[stats]['e'], var, name='act', debug=KFAC_DEBUG) Q, e = factorReshape(Q, e, grad, facIndx=idx, ftype='act') eigVals.append(e) grad = gmatmul(Q, grad, transpose_a=True, reduce_dim=idx) for idx, stats in enumerate( self.stats[var]['bprop_concat_stats']): Q = self.stats_eigen[stats]['Q'] e = detectMinVal(self.stats_eigen[stats]['e'], var, name='grad', debug=KFAC_DEBUG) Q, e = factorReshape(Q, e, grad, facIndx=idx, ftype='grad') eigVals.append(e) grad = gmatmul(grad, Q, transpose_b=False, reduce_dim=idx) ## ##### # whiten using eigenvalues weightDecayCoeff = 0. if var in self._weight_decay_dict: weightDecayCoeff = self._weight_decay_dict[var] if KFAC_DEBUG: print(('weight decay coeff for %s is %f' % (var.name, weightDecayCoeff))) if self._factored_damping: if KFAC_DEBUG: print(('use factored damping for %s' % (var.name))) coeffs = 1. num_factors = len(eigVals) # compute the ratio of two trace norm of the left and right # KFac matrices, and their generalization if len(eigVals) == 1: damping = self._epsilon + weightDecayCoeff else: damping = tf.pow(self._epsilon + weightDecayCoeff, 1. / num_factors) eigVals_tnorm_avg = [ tf.reduce_mean(tf.abs(e)) for e in eigVals ] for e, e_tnorm in zip(eigVals, eigVals_tnorm_avg): eig_tnorm_negList = [ item for item in eigVals_tnorm_avg if item != e_tnorm ] if len(eigVals) == 1: adjustment = 1. elif len(eigVals) == 2: adjustment = tf.sqrt(e_tnorm / eig_tnorm_negList[0]) else: eig_tnorm_negList_prod = reduce( lambda x, y: x * y, eig_tnorm_negList) adjustment = tf.pow( tf.pow(e_tnorm, num_factors - 1.) / eig_tnorm_negList_prod, 1. / num_factors) coeffs *= (e + adjustment * damping) else: coeffs = 1. damping = (self._epsilon + weightDecayCoeff) for e in eigVals: coeffs *= e coeffs += damping #grad = tf.Print(grad, [tf.convert_to_tensor('1'), tf.convert_to_tensor(var.name), grad.get_shape()]) grad /= coeffs #grad = tf.Print(grad, [tf.convert_to_tensor('2'), tf.convert_to_tensor(var.name), grad.get_shape()]) ##### # project gradient back to euclidean space for idx, stats in enumerate( self.stats[var]['fprop_concat_stats']): Q = self.stats_eigen[stats]['Q'] grad = gmatmul(Q, grad, transpose_a=False, reduce_dim=idx) for idx, stats in enumerate( self.stats[var]['bprop_concat_stats']): Q = self.stats_eigen[stats]['Q'] grad = gmatmul(grad, Q, transpose_b=True, reduce_dim=idx) ## #grad = tf.Print(grad, [tf.convert_to_tensor('3'), tf.convert_to_tensor(var.name), grad.get_shape()]) if (self.stats[var]['assnBias'] is not None) and not self._blockdiag_bias: # use homogeneous coordinates only works for 2D grad. # TO-DO: figure out how to factorize bias grad # un-stack bias grad var_assnBias = self.stats[var]['assnBias'] C_plus_one = int(grad.get_shape()[0]) grad_assnBias = tf.reshape( tf.slice(grad, begin=[C_plus_one - 1, 0], size=[1, -1]), var_assnBias.get_shape()) grad_assnWeights = tf.slice(grad, begin=[0, 0], size=[C_plus_one - 1, -1]) grad_dict[var_assnBias] = grad_assnBias grad = grad_assnWeights #grad = tf.Print(grad, [tf.convert_to_tensor('4'), tf.convert_to_tensor(var.name), grad.get_shape()]) if GRAD_RESHAPE: grad = tf.reshape(grad, GRAD_SHAPE) grad_dict[var] = grad print(('projecting %d gradient matrices' % counter)) for g, var in zip(gradlist, varlist): grad = grad_dict[var] ### clipping ### if KFAC_DEBUG: print(('apply clipping to %s' % (var.name))) tf.Print(grad, [tf.sqrt(tf.reduce_sum(tf.pow(grad, 2)))], "Euclidean norm of new grad") local_vg = tf.reduce_sum(grad * g * (self._lr * self._lr)) vg += local_vg # recale everything if KFAC_DEBUG: print('apply vFv clipping') scaling = tf.minimum(1., tf.sqrt(self._clip_kl / vg)) if KFAC_DEBUG: scaling = tf.Print(scaling, [ tf.convert_to_tensor('clip: '), scaling, tf.convert_to_tensor(' vFv: '), vg ]) with tf.control_dependencies([tf.assign(self.vFv, vg)]): updatelist = [grad_dict[var] for var in varlist] for i, item in enumerate(updatelist): updatelist[i] = scaling * item return updatelist
def compute_stats(self, loss_sampled, var_list=None): varlist = var_list if varlist is None: varlist = tf.trainable_variables() gs = tf.gradients(loss_sampled, varlist, name='gradientsSampled') self.gs = gs factors = self.getFactors(gs, varlist) stats = self.getStats(factors, varlist) updateOps = [] statsUpdates = {} statsUpdates_cache = {} for var in varlist: opType = factors[var]['opName'] fops = factors[var]['op'] fpropFactor = factors[var]['fpropFactors_concat'] fpropStats_vars = stats[var]['fprop_concat_stats'] bpropFactor = factors[var]['bpropFactors_concat'] bpropStats_vars = stats[var]['bprop_concat_stats'] SVD_factors = {} for stats_var in fpropStats_vars: stats_var_dim = int(stats_var.get_shape()[0]) if stats_var not in statsUpdates_cache: old_fpropFactor = fpropFactor B = (tf.shape(fpropFactor)[0]) # batch size if opType == 'Conv2D': strides = fops.get_attr("strides") padding = fops.get_attr("padding") convkernel_size = var.get_shape()[0:3] KH = int(convkernel_size[0]) KW = int(convkernel_size[1]) C = int(convkernel_size[2]) flatten_size = int(KH * KW * C) Oh = int(bpropFactor.get_shape()[1]) Ow = int(bpropFactor.get_shape()[2]) if Oh == 1 and Ow == 1 and self._channel_fac: # factorization along the channels # assume independence among input channels # factor = B x 1 x 1 x (KH xKW x C) # patches = B x Oh x Ow x (KH xKW x C) if len(SVD_factors) == 0: if KFAC_DEBUG: print(( 'approx %s act factor with rank-1 SVD factors' % (var.name))) # find closest rank-1 approx to the feature map S, U, V = tf.batch_svd( tf.reshape(fpropFactor, [-1, KH * KW, C])) # get rank-1 approx slides sqrtS1 = tf.expand_dims(tf.sqrt(S[:, 0, 0]), 1) patches_k = U[:, :, 0] * sqrtS1 # B x KH*KW full_factor_shape = fpropFactor.get_shape() patches_k.set_shape( [full_factor_shape[0], KH * KW]) patches_c = V[:, :, 0] * sqrtS1 # B x C patches_c.set_shape([full_factor_shape[0], C]) SVD_factors[C] = patches_c SVD_factors[KH * KW] = patches_k fpropFactor = SVD_factors[stats_var_dim] else: # poor mem usage implementation patches = tf.extract_image_patches( fpropFactor, ksizes=[ 1, convkernel_size[0], convkernel_size[1], 1 ], strides=strides, rates=[1, 1, 1, 1], padding=padding) if self._approxT2: if KFAC_DEBUG: print(('approxT2 act fisher for %s' % (var.name))) # T^2 terms * 1/T^2, size: B x C fpropFactor = tf.reduce_mean(patches, [1, 2]) else: # size: (B x Oh x Ow) x C fpropFactor = tf.reshape( patches, [-1, flatten_size]) / Oh / Ow fpropFactor_size = int(fpropFactor.get_shape()[-1]) if stats_var_dim == (fpropFactor_size + 1) and not self._blockdiag_bias: if opType == 'Conv2D' and not self._approxT2: # correct padding for numerical stability (we # divided out OhxOw from activations for T1 approx) fpropFactor = tf.concat([ fpropFactor, tf.ones([tf.shape(fpropFactor)[0], 1]) / Oh / Ow ], 1) else: # use homogeneous coordinates fpropFactor = tf.concat([ fpropFactor, tf.ones([tf.shape(fpropFactor)[0], 1]) ], 1) # average over the number of data points in a batch # divided by B cov = tf.matmul(fpropFactor, fpropFactor, transpose_a=True) / tf.cast(B, tf.float32) updateOps.append(cov) statsUpdates[stats_var] = cov if opType != 'Conv2D': # HACK: for convolution we recompute fprop stats for # every layer including forking layers statsUpdates_cache[stats_var] = cov for stats_var in bpropStats_vars: stats_var_dim = int(stats_var.get_shape()[0]) if stats_var not in statsUpdates_cache: old_bpropFactor = bpropFactor bpropFactor_shape = bpropFactor.get_shape() B = tf.shape(bpropFactor)[0] # batch size C = int(bpropFactor_shape[-1]) # num channels if opType == 'Conv2D' or len(bpropFactor_shape) == 4: if fpropFactor is not None: if self._approxT2: if KFAC_DEBUG: print(('approxT2 grad fisher for %s' % (var.name))) bpropFactor = tf.reduce_sum( bpropFactor, [1, 2]) # T^2 terms * 1/T^2 else: bpropFactor = tf.reshape( bpropFactor, [-1, C]) * Oh * Ow # T * 1/T terms else: # just doing block diag approx. spatial independent # structure does not apply here. summing over # spatial locations if KFAC_DEBUG: print(('block diag approx fisher for %s' % (var.name))) bpropFactor = tf.reduce_sum(bpropFactor, [1, 2]) # assume sampled loss is averaged. TO-DO:figure out better # way to handle this bpropFactor *= tf.to_float(B) ## cov_b = tf.matmul(bpropFactor, bpropFactor, transpose_a=True) / tf.to_float( tf.shape(bpropFactor)[0]) updateOps.append(cov_b) statsUpdates[stats_var] = cov_b statsUpdates_cache[stats_var] = cov_b if KFAC_DEBUG: aKey = list(statsUpdates.keys())[0] statsUpdates[aKey] = tf.Print(statsUpdates[aKey], [ tf.convert_to_tensor('step:'), self.global_step, tf.convert_to_tensor('computing stats'), ]) self.statsUpdates = statsUpdates return statsUpdates
def multi_modal_network(dim_input=27, dim_output=7, batch_size=25, network_config=None): """ An example a network in tf that has both state and image inputs. Args: dim_input: Dimensionality of input. dim_output: Dimensionality of the output. batch_size: Batch size. network_config: dictionary of network structure parameters Returns: A tfMap object that stores inputs, outputs, and scalar loss. """ n_layers = 2 layer_size = 20 dim_hidden = (n_layers - 1)*[layer_size] dim_hidden.append(dim_output) pool_size = 2 filter_size = 3 # List of indices for state (vector) data and image (tensor) data in observation. x_idx, img_idx, i = [], [], 0 for sensor in network_config['obs_include']: dim = network_config['sensor_dims'][sensor] if sensor in network_config['obs_image_data']: img_idx = img_idx + list(range(i, i+dim)) else: x_idx = x_idx + list(range(i, i+dim)) i += dim nn_input, action, precision = get_input_layer(dim_input, dim_output) state_input = nn_input[:, 0:x_idx[-1]+1] image_input = nn_input[:, x_idx[-1]+1:img_idx[-1]+1] # image goes through 2 convnet layers num_filters = network_config['num_filters'] im_height = network_config['image_height'] im_width = network_config['image_width'] num_channels = network_config['image_channels'] image_input = tf.reshape(image_input, [-1, im_width, im_height, num_channels]) # we pool twice, each time reducing the image size by a factor of 2. conv_out_size = int(im_width/(2.0*pool_size)*im_height/(2.0*pool_size)*num_filters[1]) first_dense_size = conv_out_size + len(x_idx) # Store layers weight & bias weights = { 'wc1': get_xavier_weights([filter_size, filter_size, num_channels, num_filters[0]], (pool_size, pool_size)), # 5x5 conv, 1 input, 32 outputs 'wc2': get_xavier_weights([filter_size, filter_size, num_filters[0], num_filters[1]], (pool_size, pool_size)), # 5x5 conv, 32 inputs, 64 outputs } biases = { 'bc1': init_bias([num_filters[0]]), 'bc2': init_bias([num_filters[1]]), } conv_layer_0 = conv2d(img=image_input, w=weights['wc1'], b=biases['bc1']) conv_layer_0 = max_pool(conv_layer_0, k=pool_size) conv_layer_1 = conv2d(img=conv_layer_0, w=weights['wc2'], b=biases['bc2']) conv_layer_1 = max_pool(conv_layer_1, k=pool_size) conv_out_flat = tf.reshape(conv_layer_1, [-1, conv_out_size]) fc_input = tf.concat(concat_dim=1, values=[conv_out_flat, state_input]) fc_output, _, _ = get_mlp_layers(fc_input, n_layers, dim_hidden) loss = euclidean_loss_layer(a=action, b=fc_output, precision=precision, batch_size=batch_size) return TfMap.init_from_lists([nn_input, action, precision], [fc_output], [loss])
def multi_modal_network_fp(dim_input=27, dim_output=7, batch_size=25, network_config=None): """ An example a network in tf that has both state and image inputs, with the feature point architecture (spatial softmax + expectation). Args: dim_input: Dimensionality of input. dim_output: Dimensionality of the output. batch_size: Batch size. network_config: dictionary of network structure parameters Returns: A tfMap object that stores inputs, outputs, and scalar loss. """ n_layers = 3 layer_size = 20 dim_hidden = (n_layers - 1)*[layer_size] dim_hidden.append(dim_output) pool_size = 2 filter_size = 5 # List of indices for state (vector) data and image (tensor) data in observation. x_idx, img_idx, i = [], [], 0 for sensor in network_config['obs_include']: dim = network_config['sensor_dims'][sensor] if sensor in network_config['obs_image_data']: img_idx = img_idx + list(range(i, i+dim)) else: x_idx = x_idx + list(range(i, i+dim)) i += dim nn_input, action, precision = get_input_layer(dim_input, dim_output) state_input = nn_input[:, 0:x_idx[-1]+1] image_input = nn_input[:, x_idx[-1]+1:img_idx[-1]+1] # image goes through 3 convnet layers num_filters = network_config['num_filters'] im_height = network_config['image_height'] im_width = network_config['image_width'] num_channels = network_config['image_channels'] image_input = tf.reshape(image_input, [-1, num_channels, im_width, im_height]) image_input = tf.transpose(image_input, perm=[0,3,2,1]) # we pool twice, each time reducing the image size by a factor of 2. conv_out_size = int(im_width/(2.0*pool_size)*im_height/(2.0*pool_size)*num_filters[1]) first_dense_size = conv_out_size + len(x_idx) # Store layers weight & bias with tf.variable_scope('conv_params'): weights = { 'wc1': init_weights([filter_size, filter_size, num_channels, num_filters[0]], name='wc1'), # 5x5 conv, 1 input, 32 outputs 'wc2': init_weights([filter_size, filter_size, num_filters[0], num_filters[1]], name='wc2'), # 5x5 conv, 32 inputs, 64 outputs 'wc3': init_weights([filter_size, filter_size, num_filters[1], num_filters[2]], name='wc3'), # 5x5 conv, 32 inputs, 64 outputs } biases = { 'bc1': init_bias([num_filters[0]], name='bc1'), 'bc2': init_bias([num_filters[1]], name='bc2'), 'bc3': init_bias([num_filters[2]], name='bc3'), } conv_layer_0 = conv2d(img=image_input, w=weights['wc1'], b=biases['bc1'], strides=[1,2,2,1]) conv_layer_1 = conv2d(img=conv_layer_0, w=weights['wc2'], b=biases['bc2']) conv_layer_2 = conv2d(img=conv_layer_1, w=weights['wc3'], b=biases['bc3']) _, num_rows, num_cols, num_fp = conv_layer_2.get_shape() num_rows, num_cols, num_fp = [int(x) for x in [num_rows, num_cols, num_fp]] x_map = np.empty([num_rows, num_cols], np.float32) y_map = np.empty([num_rows, num_cols], np.float32) for i in range(num_rows): for j in range(num_cols): x_map[i, j] = (i - num_rows / 2.0) / num_rows y_map[i, j] = (j - num_cols / 2.0) / num_cols x_map = tf.convert_to_tensor(x_map) y_map = tf.convert_to_tensor(y_map) x_map = tf.reshape(x_map, [num_rows * num_cols]) y_map = tf.reshape(y_map, [num_rows * num_cols]) # rearrange features to be [batch_size, num_fp, num_rows, num_cols] features = tf.reshape(tf.transpose(conv_layer_2, [0,3,1,2]), [-1, num_rows*num_cols]) softmax = tf.nn.softmax(features) fp_x = tf.reduce_sum(tf.mul(x_map, softmax), [1], keep_dims=True) fp_y = tf.reduce_sum(tf.mul(y_map, softmax), [1], keep_dims=True) fp = tf.reshape(tf.concat(1, [fp_x, fp_y]), [-1, num_fp*2]) fc_input = tf.concat(concat_dim=1, values=[fp, state_input]) fc_output, weights_FC, biases_FC = get_mlp_layers(fc_input, n_layers, dim_hidden) fc_vars = weights_FC + biases_FC loss = euclidean_loss_layer(a=action, b=fc_output, precision=precision, batch_size=batch_size) nnet = TfMap.init_from_lists([nn_input, action, precision], [fc_output], [loss], fp=fp) last_conv_vars = fc_input return nnet, fc_vars, last_conv_vars
def _init(self, ob_space, ac_space, hid_size, num_hid_layers, gaussian_fixed_var=True): assert isinstance(ob_space, gym.spaces.Box) self.pdtype = pdtype = make_pdtype(ac_space) sequence_length = None ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape)) with tf.variable_scope("obfilter"): self.ob_rms = RunningMeanStd(shape=ob_space.shape) obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0) last_out = obz for i in range(num_hid_layers): last_out = tf.nn.tanh( dense(last_out, hid_size, "vffc%i" % (i + 1), weight_init=U.normc_initializer(1.0))) self.vpred = dense(last_out, 1, "vffinal", weight_init=U.normc_initializer(1.0))[:, 0] last_out = obz for i in range(num_hid_layers): last_out = tf.nn.tanh( dense(last_out, hid_size, "polfc%i" % (i + 1), weight_init=U.normc_initializer(1.0))) if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box): mean = dense(last_out, pdtype.param_shape()[0] // 2, "polfinal", U.normc_initializer(0.01)) logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0] // 2], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) else: pdparam = dense(last_out, pdtype.param_shape()[0], "polfinal", U.normc_initializer(0.01)) self.pd = pdtype.pdfromflat(pdparam) self.state_in = [] self.state_out = [] # change for BC stochastic = U.get_placeholder(name="stochastic", dtype=tf.bool, shape=()) ac = U.switch(stochastic, self.pd.sample(), self.pd.mode()) self.ac = ac self._act = U.function([stochastic, ob], [ac, self.vpred])
def flatten_grads(var_list, grads): """Flattens a variables and their gradients. """ return tf.concat( [tf.reshape(grad, [U.numel(v)]) for (v, grad) in zip(var_list, grads)], 0)
def __call__(self, obs, action, reuse=False): with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): x = tf.concat([obs, action], axis=-1) # this assumes observation and action can be concatenated x = self.network_builder(x) x = tf.layers.dense(x, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)) return x