def embedding_lookup(t): if not reuse: log.warning(scope.name) _ = fc(t, int(embedding_dim/4), is_train, info=not reuse, name='fc1') _ = fc(_, embedding_dim, is_train, info=not reuse, name='fc2') return _
def build(self, img, q, scope='Classifier'): # Normalize input images into 0 ~ 1 img = img / 255. with tf.variable_scope(scope): conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1') conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2') conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3') # Append the question into image features conv_q = tf.concat([tf.reshape(conv_3, [tf.shape(conv_3)[0], -1]), q], axis=1) fc_1 = fc(conv_q, 256, activation_fn=tf.nn.relu, name='fc_1') fc_2 = fc(fc_1, 256, activation_fn=tf.nn.relu, name='fc_2') fc_2 = slim.dropout(fc_2, keep_prob=0.5, is_training=self.is_training, scope='fc_3/') logits = fc(fc_2, self.a_dim, activation_fn=None, name='fc_3') return logits
def Token_Decoder(f, token_dim, scope='Token_Decoder', reuse=False): with tf.variable_scope(scope, reuse=reuse) as scope: if not reuse: log.warning(scope.name) _ = fc(f, token_dim, is_train, info=not reuse, batch_norm=False, activation_fn=None, name='fc1') return _
def build(self, img, q, scope='Classifier'): # Normalize input images into 0 ~ 1 img = img / 255. with tf.variable_scope(scope): ###################### MODIFY HERE ###################### ## Compute film(q) for gamma, beta conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1') ## Affine transform of conv_1 conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2') ## Affine transform of conv_2 conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3') ## Affine transform of conv_3 ######################################################### features = tf.reshape(conv_3, [tf.shape(conv_3)[0], -1]) fc_1 = fc(features, 256, activation_fn=tf.nn.relu, name='fc_1') fc_2 = fc(fc_1, 256, activation_fn=tf.nn.relu, name='fc_2') fc_2 = slim.dropout(fc_2, keep_prob=0.5, is_training=self.is_training, scope='fc_3/') logits = fc(fc_2, self.a_dim, activation_fn=None, name='fc_3') return logits
def build(self, img, q, scope='Classifier'): # Normalize input images into 0 ~ 1 img = img / 255. with tf.variable_scope(scope): ###################### MODIFY HERE ###################### def film(q, scope='film'): with tf.variable_scope(scope): cond = fc(q, 3 * 2 * 24, activation_fn=None, name='cond') cond = tf.reshape(cond, [-1, 3, 2, 24]) return cond def modulate(conv, gamma, beta): gamma = tf.reshape(gamma, [-1, 1, 1, 24]) beta = tf.reshape(beta, [-1, 1, 1, 24]) return (1 + gamma) * conv + beta q_embed = fc(q, 256, name='fc_q') cond = film(q_embed) conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1') conv_1 = modulate(conv_1, cond[:, 0, 0, :], cond[:, 0, 1, :]) conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2') conv_2 = modulate(conv_2, cond[:, 1, 0, :], cond[:, 1, 1, :]) conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3') conv_3 = modulate(conv_3, cond[:, 2, 0, :], cond[:, 2, 1, :]) ######################################################### features = tf.reshape(conv_3, [tf.shape(conv_3)[0], -1]) fc_1 = fc(features, 256, activation_fn=tf.nn.relu, name='fc_1') fc_2 = fc(fc_1, 256, activation_fn=tf.nn.relu, name='fc_2') fc_2 = slim.dropout(fc_2, keep_prob=0.5, is_training=self.is_training, scope='fc_3/') logits = fc(fc_2, self.a_dim, activation_fn=None, name='fc_3') return logits
def State_Encoder(s, per, batch_size, scope='State_Encoder', reuse=False): with tf.variable_scope(scope, reuse=reuse) as scope: if not reuse: log.warning(scope.name) _ = conv2d(s, 16, is_train, k_h=3, k_w=3, info=not reuse, batch_norm=True, name='conv1') _ = conv2d(_, 32, is_train, k_h=3, k_w=3, info=not reuse, batch_norm=True, name='conv2') _ = conv2d(_, 48, is_train, k_h=3, k_w=3, info=not reuse, batch_norm=True, name='conv3') if self.pixel_input: _ = conv2d(_, 48, is_train, k_h=3, k_w=3, info=not reuse, batch_norm=True, name='conv4') _ = conv2d(_, 48, is_train, k_h=3, k_w=3, info=not reuse, batch_norm=True, name='conv5') state_feature = tf.reshape(_, [batch_size, -1]) if self.state_encoder_fc: state_feature = fc(state_feature, 512, is_train, info=not reuse, name='fc1') state_feature = fc(state_feature, 512, is_train, info=not reuse, name='fc2') state_feature = tf.concat([state_feature, per], axis=-1) if not reuse: log.info( 'concat feature {}'.format(state_feature)) return state_feature
def build(self, img, q, scope='Classifier'): # Normalize input images into 0 ~ 1 img = img / 255. def _positional_encoding(features): # Append two features of positional encoding to the given feature maps d = features.get_shape().as_list()[1] indices = tf.range(d) x = tf.tile(tf.reshape(indices, [d, 1]), [1, d]) y = tf.tile(tf.reshape(indices, [1, d]), [d, 1]) pos = tf.cast(tf.stack([x, y], axis=2)[None] / d, tf.float32) pos = tf.tile(pos, [tf.shape(img)[0], 1, 1, 1]) return tf.concat([features, pos], axis=3) def f_phi(g, scope='f_phi'): with tf.variable_scope(scope): fc_1 = fc(g, 256, activation_fn=tf.nn.relu, name='fc_1') fc_1 = slim.dropout(fc_1, keep_prob=0.5, is_training=self.is_training, scope='fc_3/') logits = fc(fc_1, self.a_dim, activation_fn=None, name='fc_3') return logits with tf.variable_scope(scope): conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1') conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2') conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3') # (b,d,d,c) # Adding positional information (x,y) into features: size (b,d,d,c+2) conv_pos = _positional_encoding(conv_3) ###################### MODIFY HERE ###################### conv_q = tf.concat([tf.reshape(conv_pos, [tf.shape(conv_pos)[0], -1]), q], axis=1) fc_1 = fc(conv_q, 256, activation_fn=tf.nn.relu, name='fc_1') all_g = fc_1 ######################################################### logits = f_phi(all_g) return logits
def f_phi(g, scope='f_phi'): with tf.variable_scope(scope): fc_1 = fc(g, 256, activation_fn=tf.nn.relu, name='fc_1') fc_1 = slim.dropout(fc_1, keep_prob=0.5, is_training=self.is_training, scope='fc_3/') logits = fc(fc_1, self.a_dim, activation_fn=None, name='fc_3') return logits
def film(q, scope='film'): with tf.variable_scope(scope): cond = fc(q, 3 * 2 * 24, activation_fn=None, name='cond') cond = tf.reshape(cond, [-1, 3, 2, 24]) return cond
def g_theta(o_i, o_j, q, scope='g_theta', reuse=True): with tf.variable_scope(scope, reuse=reuse): g_1 = fc(tf.concat([o_i, o_j, q], axis=1), 256, name='g_1') g_2 = fc(g_1, 256, name='g_2') return g_2