Exemplo n.º 1
0
 def create_somenet_from_feature(self,
                                 hidden,
                                 netname,
                                 elu,
                                 lstm,
                                 initialized_as_zero=False,
                                 nolu_at_final=False,
                                 batch_normalization=None):
     # featvec = self.cur_featvec
     '''
     Note flattened multi-view feature vector [B, 1, F*V] should be used
     '''
     featvec = self.cur_mvfeatvec
     if lstm is True:
         featvec = self.get_lstm_featvec('LSTM', featvec)
     net = vision.ConvApplier(None,
                              hidden,
                              netname,
                              elu,
                              initialized_as_zero=initialized_as_zero,
                              nolu_at_final=nolu_at_final,
                              batch_normalization=batch_normalization)
     _, out = net.infer(featvec)
     '''
     TODO: Check if this returns LSTM as well (probably not)
     '''
     params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                scope=netname)
     print('{} Params {}'.format(netname, params))
     print('LSTM Params {}'.format(self.acquire_lstm_params()))
     return out, params, [net]
Exemplo n.º 2
0
 def get_forward_model(self, jointfw=False, output_fn=-1):
     if self.forward_output_tensor is not None:
         return self.forward_model_params, self.forward_output_tensor
     '''
     our pipeline use [None, V, N] feature vector
     3D tensor unifies per-view tensors and combined-view tensors.
     '''
     if jointfw:
         '''
         Joint Prediction of V views
         Expecting better accuracy with V^2 size of network.
         '''
         V = int(self.cur_featvec.shape[1])
         N = int(self.cur_featvec.shape[2])
         input_featvec = tf.reshape(self.cur_featvec, [-1, 1, V * N])
         atensor = self.action_tensor
         name = 'JointForwardModelNet'
     else:
         '''
         Without --jointfw, get_local_action would generate action in local
         coord. sys. and then get_forward_model() predicts each view individually
         '''
         atensor = self.get_local_action(self.action_tensor)
         input_featvec = self.cur_featvec
         name = 'ForwardModelNet'
     featnums = self.fwhidden_params
     if output_fn <= 0:
         featnums += [int(input_featvec.shape[-1])]
     else:
         featnums += [output_fn]
     featvec_plus_action = tf.concat([atensor, input_featvec], 2)
     self.forward_fc_applier = vision.ConvApplier(None,
                                                  featnums,
                                                  name,
                                                  self.elu,
                                                  nolu_at_final=True)
     # FIXME: ConvApplier.infer returns tuples, which is unsuitable for Optimizer
     params, out = self.forward_fc_applier.infer(featvec_plus_action)
     if jointfw:
         if output_fn <= 0:
             out = tf.reshape(out, [-1, V, N])
         else:
             pass  # Return [-1, 1, output_fn] as requested
     self.forward_model_params = params
     self.forward_output_tensor = out
     print('FWD Params {}'.format(params))
     params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name)
     print('FWD Collected Params {}'.format(params))
     return params, out
Exemplo n.º 3
0
    def __init__(self, learning_rate, args, batch_normalization=None):
        self._debug_trans_only = 'tunnel_finder_trans_only' in args.debug_flags

        self.view_num, self.views = rlutil.get_view_cfg(args)
        w = h = args.res
        self.args = args
        self.batch_normalization = batch_normalization
        batch_size = None if args.EXPLICIT_BATCH_SIZE < 0 else args.EXPLICIT_BATCH_SIZE
        self.batch_size = batch_size
        # tf.reshape does not accept None as dimension
        self.reshape_batch_size = -1 if batch_size is None else batch_size

        common_shape = [batch_size, self.view_num, w, h]
        self.action_space_dimension = 6  # Magic number, 3D + Axis Angle
        self.pred_action_size = self.action_space_dimension * self.get_number_of_predictions(
        )
        action_shape = self._get_action_placeholder_shape(batch_size)
        self.action_tensor = tf.placeholder(tf.float32,
                                            shape=action_shape,
                                            name='CActionPh')
        self.rgb_tensor = tf.placeholder(tf.float32,
                                         shape=common_shape + [3],
                                         name='RgbPh')
        self.dep_tensor = tf.placeholder(tf.float32,
                                         shape=common_shape + [1],
                                         name='DepPh')

        assert self.view_num == 1 or args.sharedmultiview, "must be shared multiview, or single view"
        assert args.ferev in [
            11, 13
        ], 'Assumes --ferev 11 or 13 (implied by --visionformula )'
        # Let's try ResNet 18 True
        self.feature_extractor = vision.FeatureExtractorResNet(
            config.SV_RESNET18_TRUE,
            args.fehidden + [args.featnum],
            'VisionNetRev13',
            args.elu,
            batch_normalization=batch_normalization)
        self._vision_params, self.featvec = self.feature_extractor.infer(
            self.rgb_tensor, self.dep_tensor)
        B, V, N = self.featvec.shape
        self.joint_featvec = tf.reshape(self.featvec, [-1, 1, int(V) * int(N)])

        naming = 'TunnelFinderNet'
        self._finder_net = vision.ConvApplier(
            None, args.polhidden + [self.pred_action_size], naming, args.elu)
        self._finder_params, self.finder_pred = self._finder_net.infer(
            self.joint_featvec)
Exemplo n.º 4
0
 def get_inverse_model(self):
     if self.inverse_output_tensor is not None:
         return self.inverse_model_params, self.inverse_output_tensor
     input_featvec = tf.concat([self.cur_featvec, self.next_featvec], 2)
     print('inverse_model input {}'.format(input_featvec))
     # featnums = [config.INVERSE_MODEL_HIDDEN_LAYER, int(self.action_tensor.shape[-1])]
     # featnums = config.INVERSE_MODEL_HIDDEN_LAYER + [int(self.action_tensor.shape[-1])]
     featnums = self.imhidden_params + [self.action_space_dimension]
     print('inverse_model featnums {}'.format(featnums))
     self.inverse_fc_applier = vision.ConvApplier(None, featnums,
                                                  'InverseModelNet',
                                                  self.elu)
     params, out = self.inverse_fc_applier.infer(input_featvec)
     self.inverse_model_params = params
     self.inverse_output_tensor = self.vote(out)
     return params, out
Exemplo n.º 5
0
 def create_somenet_from_feature(self, hidden, netname):
     outs = []
     nets = []
     paramss = []
     for i in range(self.view_num):
         icm = self.icms[i]
         with tf.variable_scope(netname):
             vsn = 'View_{}'.format(i)
             featvec = icm.cur_featvec
             nets.append(vision.ConvApplier(None, hidden, vsn, elu))
             _, out = nets[-1].infer(featvec)
             outs.append(tf.nn.softmax(out))
             paramss.append()
     out = tf.add_n(outs)
     params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                scope=netname)
     return out, params, nets
Exemplo n.º 6
0
    def __init__(self, learning_rate, args, batch_normalization=None):
        super(TunnelFinderTwin1,
              self).__init__(learning_rate=learning_rate,
                             args=args,
                             batch_normalization=batch_normalization)

        self.coarse_action = self.action_tensor
        self.coarse_rgb = self.rgb_tensor
        self.coarse_dep = self.dep_tensor
        self.fine_action = self.action_tensor
        self.fine_rgb = self.rgb_tensor
        self.fine_dep = self.dep_tensor

        self._coarse_net = self._finder_net
        self.coarse_pred = self.finder_pred
        naming = 'TunnelFinderNet_Fine'
        self._fine_net = vision.ConvApplier(
            None, args.polhidden + [self.action_space_dimension], naming,
            args.elu)
        self._fine_params, self.fine_pred = self._fine_net.infer(
            self.joint_featvec)
Exemplo n.º 7
0
        adists[i, 0, actions[i]] = 1.0
    return adists


# GT Input
action_tensor = tf.placeholder(tf.float32,
                               shape=[BSize, 1, action_space_dimension],
                               name='ActionPh')
V_tensor = tf.placeholder(tf.float32, shape=[BSize], name='VPh')
# Feature Vector
featvec = tf.placeholder(tf.float32, shape=[BSize, 1, FEAT], name='ActionPh')

# pi and value net
pi_net = vision.ConvApplier(None, [HIDDEN, HIDDEN, action_space_dimension],
                            'PiNet',
                            elu=True,
                            initialized_as_zero=False,
                            nolu_at_final=True,
                            batch_normalization=None)
_, raw_pi = pi_net.infer(featvec)
print("raw_pi {}".format(raw_pi.shape))
value_net = vision.ConvApplier(None, [HIDDEN, HIDDEN, 1],
                               'ValueNet',
                               elu=True,
                               initialized_as_zero=False,
                               nolu_at_final=True,
                               batch_normalization=None)
_, raw_value = value_net.infer(featvec)
print("raw_value {}".format(raw_value.shape))

if len(actionset) != action_space_dimension:
    # Selective softmax
    def __init__(self, learning_rate, args, master=None, shadowid=-1):
        super(TabularRL, self).__init__()
        self.h = self.w = args.res * 2 + 1  # [-res, -res] to [res, res]
        self.mani = args.mani

        self.action_tensor = tf.placeholder(tf.float32,
                                            shape=[None, 1, 4],
                                            name='ActionPh')
        self.rgb_1_tensor = tf.placeholder(tf.int32,
                                           shape=[None, 2],
                                           name='Rgb1Ph')
        self.dep_1_tensor = tf.placeholder(tf.int32,
                                           shape=[None],
                                           name='Dep1Ph')
        self.rgb_2_tensor = tf.placeholder(tf.int32,
                                           shape=[None, 2],
                                           name='Rgb2Ph')
        self.dep_2_tensor = tf.placeholder(tf.int32,
                                           shape=[None],
                                           name='Dep2Ph')

        if shadowid >= 0:
            vprefix = 'Shadow{}_'.format(shadowid)
        else:
            vprefix = ''

        frgb_1 = tf.cast(self.rgb_1_tensor, tf.float32)
        frgb_2 = tf.cast(self.rgb_2_tensor, tf.float32)

        if not self.mani:
            self.polparams = tf.get_variable(
                vprefix + 'polgrid',
                shape=[self.w, self.h, 4],
                dtype=tf.float32,
                initializer=tf.zeros_initializer())
            self.valparams = tf.get_variable(
                vprefix + 'valgrid',
                shape=[self.w, self.h],
                dtype=tf.float32,
                initializer=tf.zeros_initializer())
            self.smpolparams = tf.nn.softmax(logits=self.polparams)

            self.indices = tf.reshape(self.rgb_1_tensor, [-1, 2]) + args.res
            self.polout = tf.gather_nd(self.smpolparams, indices=self.indices)
            print('polout shape {}'.format(self.polout.shape))
            self.valout = tf.gather_nd(self.valparams, indices=self.indices)
            self.smpol = tf.reshape(self.policy, [-1, 1, 4])
        else:
            febase = [64, 64]
            featnums = febase + [4]
            self.pol_applier = vision.ConvApplier(None,
                                                  featnums,
                                                  vprefix + 'PolMani',
                                                  elu=True,
                                                  nolu_at_final=True)
            self.polparams, self.polout = self.pol_applier.infer(frgb_1)
            featnums = febase + [1]
            self.val_applier = vision.ConvApplier(None,
                                                  featnums,
                                                  vprefix + 'ValMani',
                                                  elu=True,
                                                  nolu_at_final=True)
            self.valparams, self.valout = self.val_applier.infer(frgb_1)
            self.smpol = tf.nn.softmax(tf.reshape(self.policy, [-1, 1, 4]))
            rgb_list = []
            for x in range(-args.res, args.res + 1):
                for y in range(-args.res, args.res + 1):
                    rgb_list.append([x, y])
            self.pvgrid = np.array(rgb_list, dtype=np.int32)

        atensor = tf.reshape(self.action_tensor, [-1, 4])
        fwd_input = tf.concat([atensor, frgb_1], 1)
        featnums = [64, 2]
        self.forward_applier = vision.ConvApplier(None,
                                                  featnums,
                                                  vprefix + 'ForwardModelNet',
                                                  elu=True,
                                                  nolu_at_final=True)
        self.forward_params, self.forward = self.forward_applier.infer(
            fwd_input)
        # Per action curiosity
        print('FWD {}'.format(self.forward.shape))
        print('FRGB_2 {}'.format(frgb_2.shape))
        self.sqdiff = tf.squared_difference(self.forward, frgb_2)
        print('SQDIFF {}'.format(self.sqdiff.shape))
        self.curiosity = tf.reduce_mean(self.sqdiff, axis=[1], keepdims=False)

        print('> Polout {} Valout {} Curiosity {}'.format(
            self.polout.shape, self.valout.shape, self.curiosity.shape))
        self.loss = None
        self.debug = args.debug
        self.cr = args.cr
        self.master = master

        ws = self.get_weights()
        if master is not None:
            mws = self.master.get_weights()
            self.download_op = [
                tf.assign(target, source) for target, source in zip(ws, mws)
            ]
        else:
            self.grads_in1 = [
                tf.placeholder(tensor.dtype, shape=tensor.shape)
                for tensor in ws
            ]
            self.grads_in2 = [
                tf.placeholder(tensor.dtype, shape=tensor.shape)
                for tensor in ws
            ]
            self.grads_op = [
                tf.assign_add(var, in2 - in1)
                for var, in1, in2 in zip(ws, self.grads_in1, self.grads_in2)
            ]
Exemplo n.º 9
0
        h = r.pbufferHeight

        # Action taken
        self.a3c_batch_a_tensor = tf.placeholder(tf.float32, shape=[None, self.action_size])
        self.rgb1_tensor = tf.placeholder(tf.float32, shape=[None, view_num, w, h, 3])
        self.rgb2_tensor = tf.placeholder(tf.float32, shape=[None, view_num, w, h, 3])
        self.dep1_tensor = tf.placeholder(tf.float32, shape=[None, view_num, w, h, 1])
        self.dep2_tensor = tf.placeholder(tf.float32, shape=[None, view_num, w, h, 1])
        self.icm = icm.IntrinsicCuriosityModule(
                self.a3c_batch_a_tensor,
                self.rgb1_tensor, self.dep1_tensor,
                self.rgb2_tensor, self.dep2_tensor,
                svconfdict, mvconfdict,
                mv_featnum)

        self.policy_applier = vision.ConvApplier(None, config.POLICY_HIDDEN_LAYERS + [output_number])
        params,out = self.policy_applier.infer(self.icm.cur_featvec)
        self.policy_fc_params = params
        self.policy_params = self.cur_nn_params + params
        self.policy_out = out
        print('RLDRIVER POLICY PARAMS {}'.format(self.policy_out))
        print('RLDRIVER POLICY OUTPUT {}'.format(self.policy_out))

        self.value_applier = vision.ConvApplier(None, config.VALUE_HIDDEN_LAYERS + [1])
        params,out = self.value_applier.infer(self.icm.cur_featvec)
        self.value_fc_params = params
        self.value_params = self.cur_nn_params + params
        self.value_out = out
        print('RLDRIVER VALUE PARAMS {}'.format(self.value_out))
        print('RLDRIVER VALUE OUTPUT {}'.format(self.value_out))