示例#1
0
 def __init__(self,
              env,
              hidden_size,
              entcoeff=0.001,
              lr_rate=1e-3,
              scope="adversary"):
     self.scope = scope
     self.observation_shape = env.observation_space.shape
     self.actions_shape = env.action_space.shape
     self.input_shape = tuple([
         o + a for o, a in zip(self.observation_shape, self.actions_shape)
     ])
     self.num_actions = env.action_space.shape[0]
     self.hidden_size = hidden_size
     self.build_ph()
     # Build grpah
     generator_logits = self.build_graph(self.generator_obs_ph,
                                         self.generator_acs_ph,
                                         reuse=False)
     expert_logits = self.build_graph(self.expert_obs_ph,
                                      self.expert_acs_ph,
                                      reuse=True)
     # Build accuracy
     generator_acc = tf.reduce_mean(
         tf.to_float(tf.nn.sigmoid(generator_logits) < 0.5))
     expert_acc = tf.reduce_mean(
         tf.to_float(tf.nn.sigmoid(expert_logits) > 0.5))
     # Build regression loss
     # let x = logits, z = targets.
     # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
     generator_loss = tf.nn.sigmoid_cross_entropy_with_logits(
         logits=generator_logits, labels=tf.zeros_like(generator_logits))
     generator_loss = tf.reduce_mean(generator_loss)
     expert_loss = tf.nn.sigmoid_cross_entropy_with_logits(
         logits=expert_logits, labels=tf.ones_like(expert_logits))
     expert_loss = tf.reduce_mean(expert_loss)
     # Build entropy loss
     logits = tf.concat([generator_logits, expert_logits], 0)
     entropy = tf.reduce_mean(logit_bernoulli_entropy(logits))
     entropy_loss = -entcoeff * entropy
     # Loss + Accuracy terms
     self.losses = [
         generator_loss, expert_loss, entropy, entropy_loss, generator_acc,
         expert_acc
     ]
     self.loss_name = [
         "generator_loss", "expert_loss", "entropy", "entropy_loss",
         "generator_acc", "expert_acc"
     ]
     self.total_loss = generator_loss + expert_loss + entropy_loss
     # Build Reward for policy
     self.reward_op = -tf.log(1 - tf.nn.sigmoid(generator_logits) + 1e-8)
     var_list = self.get_trainable_variables()
     self.lossandgrad = U.function([
         self.generator_obs_ph, self.generator_acs_ph, self.expert_obs_ph,
         self.expert_acs_ph
     ], self.losses + [U.flatgrad(self.total_loss, var_list)])
示例#2
0
 def pdfromlatent(self, latent_vector, init_scale=1.0, init_bias=0.0):
     mean = fc(latent_vector,
               'pi',
               self.size,
               init_scale=init_scale,
               init_bias=init_bias)
     logstd = tf.get_variable(name='pi/logstd',
                              shape=[1, self.size],
                              initializer=tf.zeros_initializer())
     pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
     return self.pdfromflat(pdparam), mean
    def __init__(self, inputs_tf, dimo, dimg, dimu, max_u, o_stats, g_stats,
                 hidden, layers, **kwargs):
        """The actor-critic network and related training pytorch.

        Args:
            inputs_tf (dict of tensors): all necessary inputs for the network: the
                observation (o), the goal (g), and the action (u)
            dimo (int): the dimension of the observations
            dimg (int): the dimension of the goals
            dimu (int): the dimension of the actions
            max_u (float): the maximum magnitude of actions; action outputs will be scaled
                accordingly
            o_stats (baselines.her.Normalizer): normalizer for observations
            g_stats (baselines.her.Normalizer): normalizer for goals
            hidden (int): number of hidden units that should be used in hidden layers
            layers (int): number of hidden layers
        """
        self.o_tf = inputs_tf['o']
        self.g_tf = inputs_tf['g']
        self.u_tf = inputs_tf['u']

        # Prepare inputs for actor and critic.
        o = self.o_stats.normalize(self.o_tf)
        g = self.g_stats.normalize(self.g_tf)
        input_pi = tf.concat(axis=1, values=[o, g])  # for actor

        # Networks.
        with tf.variable_scope('pi'):
            self.pi_tf = self.max_u * tf.tanh(
                nn(input_pi, [self.hidden] * self.layers + [self.dimu]))
        with tf.variable_scope('Q'):
            # for policy training
            input_Q = tf.concat(axis=1, values=[o, g, self.pi_tf / self.max_u])
            self.Q_pi_tf = nn(input_Q, [self.hidden] * self.layers + [1])
            # for critic training
            input_Q = tf.concat(axis=1, values=[o, g, self.u_tf / self.max_u])
            self._input_Q = input_Q  # exposed for tests
            self.Q_tf = nn(input_Q, [self.hidden] * self.layers + [1],
                           reuse=True)
示例#4
0
    def build_graph(self, obs_ph, acs_ph, reuse=False):
        with tf.variable_scope(self.scope):
            if reuse:
                tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("obfilter"):
                self.obs_rms = RunningMeanStd(shape=self.observation_shape)
            obs = (obs_ph - self.obs_rms.mean / self.obs_rms.std)
            _input = tf.concat(
                [obs, acs_ph],
                axis=1)  # concatenate the two input -> form a transition
            p_h1 = tf.contrib.layers.fully_connected(_input,
                                                     self.hidden_size,
                                                     activation_fn=tf.nn.tanh)
            p_h2 = tf.contrib.layers.fully_connected(p_h1,
                                                     self.hidden_size,
                                                     activation_fn=tf.nn.tanh)
            logits = tf.contrib.layers.fully_connected(
                p_h2, 1, activation_fn=tf.identity)
        return logits
    def compute_gradients(self, loss, var_list, **kwargs):
        grads_and_vars = tf.train.AdamOptimizer.compute_gradients(
            self, loss, var_list, **kwargs)
        grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None]
        flat_grad = tf.concat(
            [tf.reshape(g, (-1, )) for g, v in grads_and_vars], axis=0)
        shapes = [v.shape.as_list() for g, v in grads_and_vars]
        sizes = [int(np.prod(s)) for s in shapes]

        num_tasks = self.comm.Get_size()
        buf = np.zeros(sum(sizes), np.float32)

        def _collect_grads(flat_grad):
            self.comm.Allreduce(flat_grad, buf, op=MPI.SUM)
            np.divide(buf, float(num_tasks), out=buf)
            return buf

        avg_flat_grad = tf.py_func(_collect_grads, [flat_grad], tf.float32)
        avg_flat_grad.set_shape(flat_grad.shape)
        avg_grads = tf.split(avg_flat_grad, sizes, axis=0)
        avg_grads_and_vars = [(tf.reshape(g, v.shape), v)
                              for g, (_, v) in zip(avg_grads, grads_and_vars)]

        return avg_grads_and_vars
示例#6
0
    def getFactors(self, g, varlist):
        graph = tf.get_default_graph()
        factorTensors = {}
        fpropTensors = []
        bpropTensors = []
        opTypes = []
        fops = []

        def searchFactors(gradient, graph):
            # hard coded search stratergy
            bpropOp = gradient.op
            bpropOp_name = bpropOp.name

            bTensors = []
            fTensors = []

            # combining additive gradient, assume they are the same op type and
            # indepedent
            if 'AddN' in bpropOp_name:
                factors = []
                for g in gradient.op.inputs:
                    factors.append(searchFactors(g, graph))
                op_names = [item['opName'] for item in factors]
                # TO-DO: need to check all the attribute of the ops as well
                print(gradient.name)
                print(op_names)
                print(len(np.unique(op_names)))
                assert len(np.unique(op_names)) == 1, gradient.name + \
                    ' is shared among different computation OPs'

                bTensors = reduce(lambda x, y: x + y,
                                  [item['bpropFactors'] for item in factors])
                if len(factors[0]['fpropFactors']) > 0:
                    fTensors = reduce(
                        lambda x, y: x + y,
                        [item['fpropFactors'] for item in factors])
                fpropOp_name = op_names[0]
                fpropOp = factors[0]['op']
            else:
                fpropOp_name = re.search(
                    'gradientsSampled(_[0-9]+|)/(.+?)_grad',
                    bpropOp_name).group(2)
                fpropOp = graph.get_operation_by_name(fpropOp_name)
                if fpropOp.op_def.name in KFAC_OPS:
                    # Known OPs
                    ###
                    bTensor = [
                        i for i in bpropOp.inputs
                        if 'gradientsSampled' in i.name
                    ][-1]
                    bTensorShape = fpropOp.outputs[0].get_shape()
                    if bTensor.get_shape()[0].value == None:
                        bTensor.set_shape(bTensorShape)
                    bTensors.append(bTensor)
                    ###
                    if fpropOp.op_def.name == 'BiasAdd':
                        fTensors = []
                    else:
                        fTensors.append([
                            i for i in fpropOp.inputs
                            if param.op.name not in i.name
                        ][0])
                    fpropOp_name = fpropOp.op_def.name
                else:
                    # unknown OPs, block approximation used
                    bInputsList = [
                        i for i in bpropOp.inputs[0].op.inputs
                        if 'gradientsSampled' in i.name
                        if 'Shape' not in i.name
                    ]
                    if len(bInputsList) > 0:
                        bTensor = bInputsList[0]
                        bTensorShape = fpropOp.outputs[0].get_shape()
                        if len(bTensor.get_shape()) > 0 and bTensor.get_shape(
                        )[0].value == None:
                            bTensor.set_shape(bTensorShape)
                        bTensors.append(bTensor)
                    fpropOp_name = opTypes.append('UNK-' + fpropOp.op_def.name)

            return {
                'opName': fpropOp_name,
                'op': fpropOp,
                'fpropFactors': fTensors,
                'bpropFactors': bTensors
            }

        for t, param in zip(g, varlist):
            if KFAC_DEBUG:
                print(('get factor for ' + param.name))
            factors = searchFactors(t, graph)
            factorTensors[param] = factors

        ########
        # check associated weights and bias for homogeneous coordinate representation
        # and check redundent factors
        # TO-DO: there may be a bug to detect associate bias and weights for
        # forking layer, e.g. in inception models.
        for param in varlist:
            factorTensors[param]['assnWeights'] = None
            factorTensors[param]['assnBias'] = None
        for param in varlist:
            if factorTensors[param]['opName'] == 'BiasAdd':
                factorTensors[param]['assnWeights'] = None
                for item in varlist:
                    if len(factorTensors[item]['bpropFactors']) > 0:
                        if (set(factorTensors[item]['bpropFactors']) == set(
                                factorTensors[param]['bpropFactors']
                        )) and (len(factorTensors[item]['fpropFactors']) > 0):
                            factorTensors[param]['assnWeights'] = item
                            factorTensors[item]['assnBias'] = param
                            factorTensors[param][
                                'bpropFactors'] = factorTensors[item][
                                    'bpropFactors']

        ########

        ########
        # concatenate the additive gradients along the batch dimension, i.e.
        # assuming independence structure
        for key in ['fpropFactors', 'bpropFactors']:
            for i, param in enumerate(varlist):
                if len(factorTensors[param][key]) > 0:
                    if (key + '_concat') not in factorTensors[param]:
                        name_scope = factorTensors[param][key][0].name.split(
                            ':')[0]
                        with tf.name_scope(name_scope):
                            factorTensors[param][key + '_concat'] = tf.concat(
                                factorTensors[param][key], 0)
                else:
                    factorTensors[param][key + '_concat'] = None
                for j, param2 in enumerate(varlist[(i + 1):]):
                    if (len(factorTensors[param][key]) > 0) and (set(
                            factorTensors[param2][key]) == set(
                                factorTensors[param][key])):
                        factorTensors[param2][key] = factorTensors[param][key]
                        factorTensors[param2][
                            key + '_concat'] = factorTensors[param][key +
                                                                    '_concat']
        ########

        if KFAC_DEBUG:
            for items in zip(varlist, fpropTensors, bpropTensors, opTypes):
                print((items[0].name, factorTensors[item]))
        self.factors = factorTensors
        return factorTensors
示例#7
0
    def getKfacPrecondUpdates(self, gradlist, varlist):
        updatelist = []
        vg = 0.

        assert len(self.stats) > 0
        assert len(self.stats_eigen) > 0
        assert len(self.factors) > 0
        counter = 0

        grad_dict = {var: grad for grad, var in zip(gradlist, varlist)}

        for grad, var in zip(gradlist, varlist):
            GRAD_RESHAPE = False
            GRAD_TRANSPOSE = False

            fpropFactoredFishers = self.stats[var]['fprop_concat_stats']
            bpropFactoredFishers = self.stats[var]['bprop_concat_stats']

            if (len(fpropFactoredFishers) + len(bpropFactoredFishers)) > 0:
                counter += 1
                GRAD_SHAPE = grad.get_shape()
                if len(grad.get_shape()) > 2:
                    # reshape conv kernel parameters
                    KW = int(grad.get_shape()[0])
                    KH = int(grad.get_shape()[1])
                    C = int(grad.get_shape()[2])
                    D = int(grad.get_shape()[3])

                    if len(fpropFactoredFishers) > 1 and self._channel_fac:
                        # reshape conv kernel parameters into tensor
                        grad = tf.reshape(grad, [KW * KH, C, D])
                    else:
                        # reshape conv kernel parameters into 2D grad
                        grad = tf.reshape(grad, [-1, D])
                    GRAD_RESHAPE = True
                elif len(grad.get_shape()) == 1:
                    # reshape bias or 1D parameters
                    D = int(grad.get_shape()[0])

                    grad = tf.expand_dims(grad, 0)
                    GRAD_RESHAPE = True
                else:
                    # 2D parameters
                    C = int(grad.get_shape()[0])
                    D = int(grad.get_shape()[1])

                if (self.stats[var]['assnBias']
                        is not None) and not self._blockdiag_bias:
                    # use homogeneous coordinates only works for 2D grad.
                    # TO-DO: figure out how to factorize bias grad
                    # stack bias grad
                    var_assnBias = self.stats[var]['assnBias']
                    grad = tf.concat(
                        [grad,
                         tf.expand_dims(grad_dict[var_assnBias], 0)], 0)

                # project gradient to eigen space and reshape the eigenvalues
                # for broadcasting
                eigVals = []

                for idx, stats in enumerate(
                        self.stats[var]['fprop_concat_stats']):
                    Q = self.stats_eigen[stats]['Q']
                    e = detectMinVal(self.stats_eigen[stats]['e'],
                                     var,
                                     name='act',
                                     debug=KFAC_DEBUG)

                    Q, e = factorReshape(Q, e, grad, facIndx=idx, ftype='act')
                    eigVals.append(e)
                    grad = gmatmul(Q, grad, transpose_a=True, reduce_dim=idx)

                for idx, stats in enumerate(
                        self.stats[var]['bprop_concat_stats']):
                    Q = self.stats_eigen[stats]['Q']
                    e = detectMinVal(self.stats_eigen[stats]['e'],
                                     var,
                                     name='grad',
                                     debug=KFAC_DEBUG)

                    Q, e = factorReshape(Q, e, grad, facIndx=idx, ftype='grad')
                    eigVals.append(e)
                    grad = gmatmul(grad, Q, transpose_b=False, reduce_dim=idx)
                ##

                #####
                # whiten using eigenvalues
                weightDecayCoeff = 0.
                if var in self._weight_decay_dict:
                    weightDecayCoeff = self._weight_decay_dict[var]
                    if KFAC_DEBUG:
                        print(('weight decay coeff for %s is %f' %
                               (var.name, weightDecayCoeff)))

                if self._factored_damping:
                    if KFAC_DEBUG:
                        print(('use factored damping for %s' % (var.name)))
                    coeffs = 1.
                    num_factors = len(eigVals)
                    # compute the ratio of two trace norm of the left and right
                    # KFac matrices, and their generalization
                    if len(eigVals) == 1:
                        damping = self._epsilon + weightDecayCoeff
                    else:
                        damping = tf.pow(self._epsilon + weightDecayCoeff,
                                         1. / num_factors)
                    eigVals_tnorm_avg = [
                        tf.reduce_mean(tf.abs(e)) for e in eigVals
                    ]
                    for e, e_tnorm in zip(eigVals, eigVals_tnorm_avg):
                        eig_tnorm_negList = [
                            item for item in eigVals_tnorm_avg
                            if item != e_tnorm
                        ]
                        if len(eigVals) == 1:
                            adjustment = 1.
                        elif len(eigVals) == 2:
                            adjustment = tf.sqrt(e_tnorm /
                                                 eig_tnorm_negList[0])
                        else:
                            eig_tnorm_negList_prod = reduce(
                                lambda x, y: x * y, eig_tnorm_negList)
                            adjustment = tf.pow(
                                tf.pow(e_tnorm, num_factors - 1.) /
                                eig_tnorm_negList_prod, 1. / num_factors)
                        coeffs *= (e + adjustment * damping)
                else:
                    coeffs = 1.
                    damping = (self._epsilon + weightDecayCoeff)
                    for e in eigVals:
                        coeffs *= e
                    coeffs += damping

                #grad = tf.Print(grad, [tf.convert_to_tensor('1'), tf.convert_to_tensor(var.name), grad.get_shape()])

                grad /= coeffs

                #grad = tf.Print(grad, [tf.convert_to_tensor('2'), tf.convert_to_tensor(var.name), grad.get_shape()])
                #####
                # project gradient back to euclidean space
                for idx, stats in enumerate(
                        self.stats[var]['fprop_concat_stats']):
                    Q = self.stats_eigen[stats]['Q']
                    grad = gmatmul(Q, grad, transpose_a=False, reduce_dim=idx)

                for idx, stats in enumerate(
                        self.stats[var]['bprop_concat_stats']):
                    Q = self.stats_eigen[stats]['Q']
                    grad = gmatmul(grad, Q, transpose_b=True, reduce_dim=idx)
                ##

                #grad = tf.Print(grad, [tf.convert_to_tensor('3'), tf.convert_to_tensor(var.name), grad.get_shape()])
                if (self.stats[var]['assnBias']
                        is not None) and not self._blockdiag_bias:
                    # use homogeneous coordinates only works for 2D grad.
                    # TO-DO: figure out how to factorize bias grad
                    # un-stack bias grad
                    var_assnBias = self.stats[var]['assnBias']
                    C_plus_one = int(grad.get_shape()[0])
                    grad_assnBias = tf.reshape(
                        tf.slice(grad, begin=[C_plus_one - 1, 0], size=[1,
                                                                        -1]),
                        var_assnBias.get_shape())
                    grad_assnWeights = tf.slice(grad,
                                                begin=[0, 0],
                                                size=[C_plus_one - 1, -1])
                    grad_dict[var_assnBias] = grad_assnBias
                    grad = grad_assnWeights

                #grad = tf.Print(grad, [tf.convert_to_tensor('4'), tf.convert_to_tensor(var.name), grad.get_shape()])
                if GRAD_RESHAPE:
                    grad = tf.reshape(grad, GRAD_SHAPE)

                grad_dict[var] = grad

        print(('projecting %d gradient matrices' % counter))

        for g, var in zip(gradlist, varlist):
            grad = grad_dict[var]
            ### clipping ###
            if KFAC_DEBUG:
                print(('apply clipping to %s' % (var.name)))
            tf.Print(grad, [tf.sqrt(tf.reduce_sum(tf.pow(grad, 2)))],
                     "Euclidean norm of new grad")
            local_vg = tf.reduce_sum(grad * g * (self._lr * self._lr))
            vg += local_vg

        # recale everything
        if KFAC_DEBUG:
            print('apply vFv clipping')

        scaling = tf.minimum(1., tf.sqrt(self._clip_kl / vg))
        if KFAC_DEBUG:
            scaling = tf.Print(scaling, [
                tf.convert_to_tensor('clip: '), scaling,
                tf.convert_to_tensor(' vFv: '), vg
            ])
        with tf.control_dependencies([tf.assign(self.vFv, vg)]):
            updatelist = [grad_dict[var] for var in varlist]
            for i, item in enumerate(updatelist):
                updatelist[i] = scaling * item

        return updatelist
示例#8
0
    def compute_stats(self, loss_sampled, var_list=None):
        varlist = var_list
        if varlist is None:
            varlist = tf.trainable_variables()

        gs = tf.gradients(loss_sampled, varlist, name='gradientsSampled')
        self.gs = gs
        factors = self.getFactors(gs, varlist)
        stats = self.getStats(factors, varlist)

        updateOps = []
        statsUpdates = {}
        statsUpdates_cache = {}
        for var in varlist:
            opType = factors[var]['opName']
            fops = factors[var]['op']
            fpropFactor = factors[var]['fpropFactors_concat']
            fpropStats_vars = stats[var]['fprop_concat_stats']
            bpropFactor = factors[var]['bpropFactors_concat']
            bpropStats_vars = stats[var]['bprop_concat_stats']
            SVD_factors = {}
            for stats_var in fpropStats_vars:
                stats_var_dim = int(stats_var.get_shape()[0])
                if stats_var not in statsUpdates_cache:
                    old_fpropFactor = fpropFactor
                    B = (tf.shape(fpropFactor)[0])  # batch size
                    if opType == 'Conv2D':
                        strides = fops.get_attr("strides")
                        padding = fops.get_attr("padding")
                        convkernel_size = var.get_shape()[0:3]

                        KH = int(convkernel_size[0])
                        KW = int(convkernel_size[1])
                        C = int(convkernel_size[2])
                        flatten_size = int(KH * KW * C)

                        Oh = int(bpropFactor.get_shape()[1])
                        Ow = int(bpropFactor.get_shape()[2])

                        if Oh == 1 and Ow == 1 and self._channel_fac:
                            # factorization along the channels
                            # assume independence among input channels
                            # factor = B x 1 x 1 x (KH xKW x C)
                            # patches = B x Oh x Ow x (KH xKW x C)
                            if len(SVD_factors) == 0:
                                if KFAC_DEBUG:
                                    print((
                                        'approx %s act factor with rank-1 SVD factors'
                                        % (var.name)))
                                # find closest rank-1 approx to the feature map
                                S, U, V = tf.batch_svd(
                                    tf.reshape(fpropFactor, [-1, KH * KW, C]))
                                # get rank-1 approx slides
                                sqrtS1 = tf.expand_dims(tf.sqrt(S[:, 0, 0]), 1)
                                patches_k = U[:, :, 0] * sqrtS1  # B x KH*KW
                                full_factor_shape = fpropFactor.get_shape()
                                patches_k.set_shape(
                                    [full_factor_shape[0], KH * KW])
                                patches_c = V[:, :, 0] * sqrtS1  # B x C
                                patches_c.set_shape([full_factor_shape[0], C])
                                SVD_factors[C] = patches_c
                                SVD_factors[KH * KW] = patches_k
                            fpropFactor = SVD_factors[stats_var_dim]

                        else:
                            # poor mem usage implementation
                            patches = tf.extract_image_patches(
                                fpropFactor,
                                ksizes=[
                                    1, convkernel_size[0], convkernel_size[1],
                                    1
                                ],
                                strides=strides,
                                rates=[1, 1, 1, 1],
                                padding=padding)

                            if self._approxT2:
                                if KFAC_DEBUG:
                                    print(('approxT2 act fisher for %s' %
                                           (var.name)))
                                # T^2 terms * 1/T^2, size: B x C
                                fpropFactor = tf.reduce_mean(patches, [1, 2])
                            else:
                                # size: (B x Oh x Ow) x C
                                fpropFactor = tf.reshape(
                                    patches, [-1, flatten_size]) / Oh / Ow
                    fpropFactor_size = int(fpropFactor.get_shape()[-1])
                    if stats_var_dim == (fpropFactor_size +
                                         1) and not self._blockdiag_bias:
                        if opType == 'Conv2D' and not self._approxT2:
                            # correct padding for numerical stability (we
                            # divided out OhxOw from activations for T1 approx)
                            fpropFactor = tf.concat([
                                fpropFactor,
                                tf.ones([tf.shape(fpropFactor)[0], 1]) / Oh /
                                Ow
                            ], 1)
                        else:
                            # use homogeneous coordinates
                            fpropFactor = tf.concat([
                                fpropFactor,
                                tf.ones([tf.shape(fpropFactor)[0], 1])
                            ], 1)

                    # average over the number of data points in a batch
                    # divided by B
                    cov = tf.matmul(fpropFactor, fpropFactor,
                                    transpose_a=True) / tf.cast(B, tf.float32)
                    updateOps.append(cov)
                    statsUpdates[stats_var] = cov
                    if opType != 'Conv2D':
                        # HACK: for convolution we recompute fprop stats for
                        # every layer including forking layers
                        statsUpdates_cache[stats_var] = cov

            for stats_var in bpropStats_vars:
                stats_var_dim = int(stats_var.get_shape()[0])
                if stats_var not in statsUpdates_cache:
                    old_bpropFactor = bpropFactor
                    bpropFactor_shape = bpropFactor.get_shape()
                    B = tf.shape(bpropFactor)[0]  # batch size
                    C = int(bpropFactor_shape[-1])  # num channels
                    if opType == 'Conv2D' or len(bpropFactor_shape) == 4:
                        if fpropFactor is not None:
                            if self._approxT2:
                                if KFAC_DEBUG:
                                    print(('approxT2 grad fisher for %s' %
                                           (var.name)))
                                bpropFactor = tf.reduce_sum(
                                    bpropFactor, [1, 2])  # T^2 terms * 1/T^2
                            else:
                                bpropFactor = tf.reshape(
                                    bpropFactor,
                                    [-1, C]) * Oh * Ow  # T * 1/T terms
                        else:
                            # just doing block diag approx. spatial independent
                            # structure does not apply here. summing over
                            # spatial locations
                            if KFAC_DEBUG:
                                print(('block diag approx fisher for %s' %
                                       (var.name)))
                            bpropFactor = tf.reduce_sum(bpropFactor, [1, 2])

                    # assume sampled loss is averaged. TO-DO:figure out better
                    # way to handle this
                    bpropFactor *= tf.to_float(B)
                    ##

                    cov_b = tf.matmul(bpropFactor,
                                      bpropFactor,
                                      transpose_a=True) / tf.to_float(
                                          tf.shape(bpropFactor)[0])

                    updateOps.append(cov_b)
                    statsUpdates[stats_var] = cov_b
                    statsUpdates_cache[stats_var] = cov_b

        if KFAC_DEBUG:
            aKey = list(statsUpdates.keys())[0]
            statsUpdates[aKey] = tf.Print(statsUpdates[aKey], [
                tf.convert_to_tensor('step:'),
                self.global_step,
                tf.convert_to_tensor('computing stats'),
            ])
        self.statsUpdates = statsUpdates
        return statsUpdates
示例#9
0
def multi_modal_network(dim_input=27, dim_output=7, batch_size=25, network_config=None):
    """
    An example a network in tf that has both state and image inputs.

    Args:
        dim_input: Dimensionality of input.
        dim_output: Dimensionality of the output.
        batch_size: Batch size.
        network_config: dictionary of network structure parameters
    Returns:
        A tfMap object that stores inputs, outputs, and scalar loss.
    """
    n_layers = 2
    layer_size = 20
    dim_hidden = (n_layers - 1)*[layer_size]
    dim_hidden.append(dim_output)
    pool_size = 2
    filter_size = 3

    # List of indices for state (vector) data and image (tensor) data in observation.
    x_idx, img_idx, i = [], [], 0
    for sensor in network_config['obs_include']:
        dim = network_config['sensor_dims'][sensor]
        if sensor in network_config['obs_image_data']:
            img_idx = img_idx + list(range(i, i+dim))
        else:
            x_idx = x_idx + list(range(i, i+dim))
        i += dim

    nn_input, action, precision = get_input_layer(dim_input, dim_output)

    state_input = nn_input[:, 0:x_idx[-1]+1]
    image_input = nn_input[:, x_idx[-1]+1:img_idx[-1]+1]

    # image goes through 2 convnet layers
    num_filters = network_config['num_filters']

    im_height = network_config['image_height']
    im_width = network_config['image_width']
    num_channels = network_config['image_channels']
    image_input = tf.reshape(image_input, [-1, im_width, im_height, num_channels])

    # we pool twice, each time reducing the image size by a factor of 2.
    conv_out_size = int(im_width/(2.0*pool_size)*im_height/(2.0*pool_size)*num_filters[1])
    first_dense_size = conv_out_size + len(x_idx)

    # Store layers weight & bias
    weights = {
        'wc1': get_xavier_weights([filter_size, filter_size, num_channels, num_filters[0]], (pool_size, pool_size)), # 5x5 conv, 1 input, 32 outputs
        'wc2': get_xavier_weights([filter_size, filter_size, num_filters[0], num_filters[1]], (pool_size, pool_size)), # 5x5 conv, 32 inputs, 64 outputs
    }

    biases = {
        'bc1': init_bias([num_filters[0]]),
        'bc2': init_bias([num_filters[1]]),
    }

    conv_layer_0 = conv2d(img=image_input, w=weights['wc1'], b=biases['bc1'])

    conv_layer_0 = max_pool(conv_layer_0, k=pool_size)

    conv_layer_1 = conv2d(img=conv_layer_0, w=weights['wc2'], b=biases['bc2'])

    conv_layer_1 = max_pool(conv_layer_1, k=pool_size)

    conv_out_flat = tf.reshape(conv_layer_1, [-1, conv_out_size])

    fc_input = tf.concat(concat_dim=1, values=[conv_out_flat, state_input])

    fc_output, _, _ = get_mlp_layers(fc_input, n_layers, dim_hidden)

    loss = euclidean_loss_layer(a=action, b=fc_output, precision=precision, batch_size=batch_size)
    return TfMap.init_from_lists([nn_input, action, precision], [fc_output], [loss])
示例#10
0
def multi_modal_network_fp(dim_input=27, dim_output=7, batch_size=25, network_config=None):
    """
    An example a network in tf that has both state and image inputs, with the feature
    point architecture (spatial softmax + expectation).
    Args:
        dim_input: Dimensionality of input.
        dim_output: Dimensionality of the output.
        batch_size: Batch size.
        network_config: dictionary of network structure parameters
    Returns:
        A tfMap object that stores inputs, outputs, and scalar loss.
    """
    n_layers = 3
    layer_size = 20
    dim_hidden = (n_layers - 1)*[layer_size]
    dim_hidden.append(dim_output)
    pool_size = 2
    filter_size = 5

    # List of indices for state (vector) data and image (tensor) data in observation.
    x_idx, img_idx, i = [], [], 0
    for sensor in network_config['obs_include']:
        dim = network_config['sensor_dims'][sensor]
        if sensor in network_config['obs_image_data']:
            img_idx = img_idx + list(range(i, i+dim))
        else:
            x_idx = x_idx + list(range(i, i+dim))
        i += dim

    nn_input, action, precision = get_input_layer(dim_input, dim_output)

    state_input = nn_input[:, 0:x_idx[-1]+1]
    image_input = nn_input[:, x_idx[-1]+1:img_idx[-1]+1]

    # image goes through 3 convnet layers
    num_filters = network_config['num_filters']

    im_height = network_config['image_height']
    im_width = network_config['image_width']
    num_channels = network_config['image_channels']
    image_input = tf.reshape(image_input, [-1, num_channels, im_width, im_height])
    image_input = tf.transpose(image_input, perm=[0,3,2,1])

    # we pool twice, each time reducing the image size by a factor of 2.
    conv_out_size = int(im_width/(2.0*pool_size)*im_height/(2.0*pool_size)*num_filters[1])
    first_dense_size = conv_out_size + len(x_idx)

    # Store layers weight & bias
    with tf.variable_scope('conv_params'):
        weights = {
            'wc1': init_weights([filter_size, filter_size, num_channels, num_filters[0]], name='wc1'), # 5x5 conv, 1 input, 32 outputs
            'wc2': init_weights([filter_size, filter_size, num_filters[0], num_filters[1]], name='wc2'), # 5x5 conv, 32 inputs, 64 outputs
            'wc3': init_weights([filter_size, filter_size, num_filters[1], num_filters[2]], name='wc3'), # 5x5 conv, 32 inputs, 64 outputs
        }

        biases = {
            'bc1': init_bias([num_filters[0]], name='bc1'),
            'bc2': init_bias([num_filters[1]], name='bc2'),
            'bc3': init_bias([num_filters[2]], name='bc3'),
        }

    conv_layer_0 = conv2d(img=image_input, w=weights['wc1'], b=biases['bc1'], strides=[1,2,2,1])
    conv_layer_1 = conv2d(img=conv_layer_0, w=weights['wc2'], b=biases['bc2'])
    conv_layer_2 = conv2d(img=conv_layer_1, w=weights['wc3'], b=biases['bc3'])

    _, num_rows, num_cols, num_fp = conv_layer_2.get_shape()
    num_rows, num_cols, num_fp = [int(x) for x in [num_rows, num_cols, num_fp]]
    x_map = np.empty([num_rows, num_cols], np.float32)
    y_map = np.empty([num_rows, num_cols], np.float32)

    for i in range(num_rows):
        for j in range(num_cols):
            x_map[i, j] = (i - num_rows / 2.0) / num_rows
            y_map[i, j] = (j - num_cols / 2.0) / num_cols

    x_map = tf.convert_to_tensor(x_map)
    y_map = tf.convert_to_tensor(y_map)

    x_map = tf.reshape(x_map, [num_rows * num_cols])
    y_map = tf.reshape(y_map, [num_rows * num_cols])

    # rearrange features to be [batch_size, num_fp, num_rows, num_cols]
    features = tf.reshape(tf.transpose(conv_layer_2, [0,3,1,2]),
                          [-1, num_rows*num_cols])
    softmax = tf.nn.softmax(features)

    fp_x = tf.reduce_sum(tf.mul(x_map, softmax), [1], keep_dims=True)
    fp_y = tf.reduce_sum(tf.mul(y_map, softmax), [1], keep_dims=True)

    fp = tf.reshape(tf.concat(1, [fp_x, fp_y]), [-1, num_fp*2])

    fc_input = tf.concat(concat_dim=1, values=[fp, state_input])

    fc_output, weights_FC, biases_FC = get_mlp_layers(fc_input, n_layers, dim_hidden)
    fc_vars = weights_FC + biases_FC

    loss = euclidean_loss_layer(a=action, b=fc_output, precision=precision, batch_size=batch_size)
    nnet = TfMap.init_from_lists([nn_input, action, precision], [fc_output], [loss], fp=fp)
    last_conv_vars = fc_input

    return nnet, fc_vars, last_conv_vars
    def _init(self,
              ob_space,
              ac_space,
              hid_size,
              num_hid_layers,
              gaussian_fixed_var=True):
        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)
        sequence_length = None

        ob = U.get_placeholder(name="ob",
                               dtype=tf.float32,
                               shape=[sequence_length] + list(ob_space.shape))

        with tf.variable_scope("obfilter"):
            self.ob_rms = RunningMeanStd(shape=ob_space.shape)

        obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0,
                               5.0)
        last_out = obz
        for i in range(num_hid_layers):
            last_out = tf.nn.tanh(
                dense(last_out,
                      hid_size,
                      "vffc%i" % (i + 1),
                      weight_init=U.normc_initializer(1.0)))
        self.vpred = dense(last_out,
                           1,
                           "vffinal",
                           weight_init=U.normc_initializer(1.0))[:, 0]

        last_out = obz
        for i in range(num_hid_layers):
            last_out = tf.nn.tanh(
                dense(last_out,
                      hid_size,
                      "polfc%i" % (i + 1),
                      weight_init=U.normc_initializer(1.0)))

        if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
            mean = dense(last_out,
                         pdtype.param_shape()[0] // 2, "polfinal",
                         U.normc_initializer(0.01))
            logstd = tf.get_variable(name="logstd",
                                     shape=[1, pdtype.param_shape()[0] // 2],
                                     initializer=tf.zeros_initializer())
            pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
        else:
            pdparam = dense(last_out,
                            pdtype.param_shape()[0], "polfinal",
                            U.normc_initializer(0.01))

        self.pd = pdtype.pdfromflat(pdparam)

        self.state_in = []
        self.state_out = []

        # change for BC
        stochastic = U.get_placeholder(name="stochastic",
                                       dtype=tf.bool,
                                       shape=())
        ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
        self.ac = ac
        self._act = U.function([stochastic, ob], [ac, self.vpred])
def flatten_grads(var_list, grads):
    """Flattens a variables and their gradients.
    """
    return tf.concat(
        [tf.reshape(grad, [U.numel(v)]) for (v, grad) in zip(var_list, grads)],
        0)
 def __call__(self, obs, action, reuse=False):
     with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
         x = tf.concat([obs, action], axis=-1) # this assumes observation and action can be concatenated
         x = self.network_builder(x)
         x = tf.layers.dense(x, 1, kernel_initializer=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3))
     return x