def build_model(): #model=models.Sequential() model = Sequential() model.add( layers.Dense(5, activation='sigmoid', input_shape=(train_data.shape[1], ))) # model.add(Dropout(rate=0.5, trainable =True))#trainint=true v mismatch model.add( tfp.layers.VariationalGaussianProcess( num_inducing_points=num_inducing_points, kernel_provider=RBFKernelFn(), # kernel=optimized_kernel, event_shape=[2], #outputshape # inducing_index_points_initializer=tf.constant_initializer(np.linspace(*x_range,num_inducing_points,dtype='float32')), inducing_index_points_initializer=tf.constant_initializer( np.linspace(*x_range, num_inducing_points, dtype='float32'), np.linspace(*x_range, num_inducing_points, dtype='float32')), unconstrained_observation_noise_variance_initializer=( tf.constant_initializer(noise)))) model.compile(optimizer=optimizers.Adam(lr=0.01), loss='mse', metrics=['mae', 'mse']) #model.compile(optimizer='adam',loss='mse',metrics=['mae']) return model
def __init__(self, num_classes, per_class_kernel, initial_linear_bias, initial_linear_slope, name='vgp_kernel', **kwargs): super(LinearKernelFn, self).__init__(**kwargs) self._per_class_kernel = per_class_kernel self._initial_linear_bias = initial_linear_bias self._initial_linear_slope = initial_linear_slope with tf.compat.v1.variable_scope(name): if self._per_class_kernel and num_classes > 1: shape = (num_classes,) else: shape = () self._linear_bias = self.add_variable( initializer=tf.constant_initializer(self._initial_linear_bias), shape=shape, name='linear_bias') self._linear_slope = self.add_variable( initializer=tf.constant_initializer(self._initial_linear_slope), shape=shape, name='linear_slope')
def __init__(self, num_classes, degree, per_class_kernel, feature_size, initial_amplitude, initial_length_scale, initial_linear_bias, initial_linear_slope, add_linear=False, name='vgp_kernel', **kwargs): super(MaternKernelFn, self).__init__(**kwargs) self._per_class_kernel = per_class_kernel self._initial_linear_bias = initial_linear_bias self._initial_linear_slope = initial_linear_slope self._add_linear = add_linear if degree not in [1, 3, 5]: raise ValueError( 'Matern degree must be one of [1, 3, 5]: {}'.format(degree)) self._degree = degree with tf.compat.v1.variable_scope(name): if self._per_class_kernel and num_classes > 1: amplitude_shape = (num_classes, ) length_scale_shape = (num_classes, feature_size) else: amplitude_shape = () length_scale_shape = (feature_size, ) self._amplitude = self.add_variable( initializer=tf.constant_initializer(initial_amplitude), shape=amplitude_shape, name='amplitude') self._length_scale = self.add_variable( initializer=tf.constant_initializer(initial_length_scale), shape=length_scale_shape, name='length_scale') if self._add_linear: self._linear_bias = self.add_variable( initializer=tf.constant_initializer( self._initial_linear_bias), shape=amplitude_shape, name='linear_bias') self._linear_slope = self.add_variable( initializer=tf.constant_initializer( self._initial_linear_slope), shape=amplitude_shape, name='linear_slope')
def __init__(self, **kwargs): super(RBFKernelFn, self).__init__(**kwargs) dtype = kwargs.get('dtype', None) self._amplitude = self.add_variable( initializer=tf.constant_initializer(0), dtype=dtype, name='amplitude') self._length_scale = self.add_variable( initializer=tf.constant_initializer(0), dtype=dtype, name='length_scale')
def build(self, unused_input_shape): """Initialize impulse response.""" if self.trainable: self._gain = self.add_weight( name='gain', shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(2.0)) self._decay = self.add_weight( name='decay', shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(4.0)) self.built = True
def build(self, input_shape): """Initialize impulse response.""" super(ExpDecayReverb, self).build(input_shape) if self.trainable: self._gain = self.add_weight( name='gain', shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(2.0)) self._decay = self.add_weight( name='decay', shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(4.0))
def MnistTeacher(input,keep_prob_conv, keep_prob_hidden, scope = 'Mnist',reuse = False): with tf2.variable_creator_scope(scope, reuse = reuse) as sc: with slim.arg_scope([slim.conv2d],kernel_size = [3,3],stride = [1,1], biases_initializer = tf2.constant_initializer(0.0),activation_fn= tf2.nn.relu): net = slim.conv2d(input ,32,scope= 'conv1') net = slim.max_pool2d(net,[2,2],2, scope= 'pool1') net = tf2.nn.dropout(net,kepp_prob_conv) net = slim.conv2d(net,64, scope='conv2') net = slim.max_pool2d(net,[2,2,],2, scope='pool2') net = tf2.nn.dropout(net,keep_prob_conv) net = slim.conv2d(net, 128, scope='conv3' ) net = slim.max_pool2d(net,[2,2],2, scope='pool3') net = tf2.nn.dropout(net, keep_prob_conv) net = slim.flatten(net) with slim.arg_scope([slim.fully_connected],biases_initializer = tf2.constant_initializer(0,0),activation_fn = tf2.nn.relu): net = slim.fully_connected(net, 625, scope = 'fc1') net = tf2.nn.dropout(net, keep_prob_hidden) net = slim.fully_connected(net, 10, activation_fn= None, scope= 'fc2') net = tf2.nn.softmax(net,temperature) return net
def _build_class_net_layers(self, batch_norm_relu): """Build re-usable layers for class prediction network.""" self._class_predict = tf.keras.layers.Conv2D( self._num_classes * self._anchors_per_location, kernel_size=(3, 3), bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)), kernel_initializer=tf.keras.initializers.RandomNormal(stddev=1e-5), padding='same', name='class-predict') self._class_conv = [] self._class_batch_norm_relu = {} for i in range(self._num_convs): self._class_conv.append( tf.keras.layers.Conv2D( self._num_filters, kernel_size=(3, 3), bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.keras.initializers.RandomNormal( stddev=0.01), activation=None, padding='same', name='class-' + str(i))) for level in range(self._min_level, self._max_level + 1): name = self._class_net_batch_norm_name(i, level) self._class_batch_norm_relu[name] = batch_norm_relu(name=name)
def MnistStudent (input, scope = "Mnist", reuse = False): with tf2.variable_creator_scope(scope, reuse = reuse) as sc: with slim.arg_scope([slim.fully_connected], biases_initializer = tf2.constant_initializer(0,0), activation_fn = tf2.nn.sigmoid): net = slim.fully_connected(input, 1000,scope='fc1') net = slim.fully_connected(net, 10, activation_fn= None, scope= 'fc2') return net
def __init__(self, num_classes, per_class_kernel, feature_size, initial_amplitude, initial_length_scale, initial_linear_bias, initial_linear_slope, add_linear=False, name='vgp_kernel', **kwargs): super(RBFKernelFn, self).__init__(**kwargs) self._per_class_kernel = per_class_kernel self._initial_linear_bias = initial_linear_bias self._initial_linear_slope = initial_linear_slope self._add_linear = add_linear with tf.compat.v1.variable_scope(name): if self._per_class_kernel and num_classes > 1: amplitude_shape = (num_classes, ) length_scale_shape = (num_classes, feature_size) else: amplitude_shape = () length_scale_shape = (feature_size, ) self._amplitude = self.add_variable( initializer=tf.constant_initializer(initial_amplitude), shape=amplitude_shape, name='amplitude') self._length_scale = self.add_variable( initializer=tf.constant_initializer(initial_length_scale), shape=length_scale_shape, name='length_scale') if self._add_linear: self._linear_bias = self.add_variable( initializer=tf.constant_initializer( self._initial_linear_bias), shape=amplitude_shape, name='linear_bias') self._linear_slope = self.add_variable( initializer=tf.constant_initializer( self._initial_linear_slope), shape=amplitude_shape, name='linear_slope')
def __init__(self, num_classes, num_downsample_channels, mask_crop_size, num_convs, coarse_mask_thr, gt_upsample_scale, batch_norm_relu=nn_ops.BatchNormRelu): """Initialize params to build ShapeMask coarse and fine prediction head. Args: num_classes: `int` number of mask classification categories. num_downsample_channels: `int` number of filters at mask head. mask_crop_size: feature crop size. num_convs: `int` number of stacked convolution before the last prediction layer. coarse_mask_thr: the threshold for suppressing noisy coarse prediction. gt_upsample_scale: scale for upsampling groundtruths. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._mask_num_classes = num_classes self._num_downsample_channels = num_downsample_channels self._mask_crop_size = mask_crop_size self._num_convs = num_convs self._coarse_mask_thr = coarse_mask_thr self._gt_upsample_scale = gt_upsample_scale self._class_predict_conv = tf.keras.layers.Conv2D( self._mask_num_classes, kernel_size=(1, 1), # Focal loss bias initialization to have foreground 0.01 probability. bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)), kernel_initializer=tf.keras.initializers.RandomNormal(mean=0, stddev=0.01), padding='same', name='affinity-class-predict') self._upsample_conv = tf.keras.layers.Conv2DTranspose( self._num_downsample_channels // 2, (self._gt_upsample_scale, self._gt_upsample_scale), (self._gt_upsample_scale, self._gt_upsample_scale)) self._fine_class_conv = [] self._fine_class_bn = [] for i in range(self._num_convs): self._fine_class_conv.append( tf.keras.layers.Conv2D( self._num_downsample_channels, kernel_size=(3, 3), bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.keras.initializers.RandomNormal( stddev=0.01), activation=None, padding='same', name='fine-class-%d' % i)) self._fine_class_bn.append( batch_norm_relu(name='fine-class-%d-bn' % i))
def set_initial_weights(self, mean1, mean2, rot1, rot2): if not isinstance(mean1, np.ndarray) and mean1.shape[0] == 1: # pytype: disable=attribute-error raise TypeError('mean1 matrix has the wrong size (%s)' % mean2.shape) # pytype: disable=attribute-error if not isinstance(mean2, np.ndarray) and mean2.shape[0] == 1: # pytype: disable=attribute-error raise TypeError('mean2 matrix has the wrong size (%s)' % mean2.shape) # pytype: disable=attribute-error if not isinstance(rot1, np.ndarray) and rot1.shape[1] == self.output_dims: raise TypeError('rot1 matrix has the wrong size (%s not %s)' % (rot1.shape, self.output_dims)) if not isinstance(rot2, np.ndarray) and rot2.shape[1] == self.output_dims: raise TypeError('rot2 matrix has the wrong size (%s)' % rot2.shape) self._mean1_init = tf.constant_initializer(mean1) self._mean2_init = tf.constant_initializer(mean2) self._rot1_init = tf.constant_initializer(rot1) self._rot2_init = tf.constant_initializer(rot2) self.set_weights([mean1, mean2, rot1, rot2])
def build(self, unused_input_shape): """Initialize impulse response.""" if self.trainable: initializer = tf.random_normal_initializer(mean=0, stddev=1e-2) self._magnitudes = self.add_weight(name='magnitudes', shape=[1, self._n_filter_banks], dtype=tf.float32, initializer=initializer) self._decay = self.add_weight( name='decay', shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(4.0)) self.built = True
def build_dummy_sequential_net(fc_layer_params, action_spec): """Build a dummy sequential network.""" num_actions = action_spec.maximum - action_spec.minimum + 1 logits = functools.partial( tf.keras.layers.Dense, activation=None, kernel_initializer=tf.random_uniform_initializer(minval=-0.03, maxval=0.03), bias_initializer=tf.constant_initializer(-0.2)) dense = functools.partial( tf.keras.layers.Dense, activation=tf.keras.activations.relu, kernel_initializer=tf.compat.v1.variance_scaling_initializer( scale=2.0, mode='fan_in', distribution='truncated_normal')) return sequential.Sequential( [dense(num_units) for num_units in fc_layer_params] + [logits(num_actions)])
def _get_embedding_layer(self, pretrained_embed_path, oov_buckets_size, vocab_size, embed_dim): """Get word embedding layer. Args: pretrained_embed_path: Pretrained glove embedding path. oov_buckets_size: Out-of-vocabularies bucket size. vocab_size: vocabulary size (used if pretrained_embed_path is None). embed_dim: the dimension of word embeddings ( used if pretrained_embed_path is None). Returns: A tf.keras.layers.Embedding instance. """ if pretrained_embed_path: with tf.io.gfile.GFile(pretrained_embed_path, 'rb') as f: floats_np = np.load(f) vocab_size = floats_np.shape[0] embed_dim = floats_np.shape[1] # Initialize word embeddings init_tensor = tf.constant(floats_np) oov_init = tf.compat.v1.truncated_normal_initializer(stddev=0.01)( shape=(oov_buckets_size, embed_dim), dtype=tf.float32) init_tensor = tf.concat([init_tensor, oov_init], axis=0) else: init_tensor = tf.compat.v1.truncated_normal_initializer( stddev=0.01)(shape=(vocab_size + oov_buckets_size, embed_dim), dtype=tf.float32) embeddings_initializer = tf.constant_initializer(init_tensor.numpy()) # Now the init_tensor should have shape # [vocab_size+_OOV_BUCKETS_SIZE, embed_dim] return tf.keras.layers.Embedding( vocab_size + oov_buckets_size, embed_dim, embeddings_initializer=embeddings_initializer, mask_zero=True, name='embedding')
tf.keras.backend.set_floatx('float64') # Build model. # points to sample your data range num_inducing_points = 40 model = tf.keras.Sequential([ tf.keras.layers.InputLayer(input_shape=(train_data.shape[1],),dtype='float32'), tf.keras.layers.Dense(12, kernel_initializer='zeros', use_bias=False), tfp.layers.VariationalGaussianProcess( num_inducing_points=num_inducing_points, kernel_provider=RBFKernelFn(), event_shape=[2], # inducing_index_points_initializer=tf.constant_initializer(np.linspace((min(train_data.T[0]),min(train_data.T[1]),min(train_data.T[2])),(max(train_data.T[0]),max(train_data.T[1]),max(train_data.T[2])),num_inducing_points,dtype='float32')), #change initializer dim for multiple outputs inducing_index_points_initializer=tf.constant_initializer([np.linspace(*x_range,num_inducing_points,dtype='float32'),np.linspace(*x_range,num_inducing_points,dtype='float32')]), unconstrained_observation_noise_variance_initializer=( tf.constant_initializer(0.1)) # tf.constant_initializer(0.1)), ), ]) batch_size=264 # batch_size = 64 loss = lambda y, rv_y: rv_y.variational_loss( y, kl_weight=np.array(batch_size) / train_data.shape[0]) model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001), loss=loss,metrics=['mae','mse'])
) # For numeric stability, set the default floating-point dtype to float64 tf.keras.backend.set_floatx('float64') # Build model. num_inducing_points = 40 model = tf.keras.Sequential([ tf.keras.layers.InputLayer(input_shape=[1]), tf.keras.layers.Dense(1, kernel_initializer='ones', use_bias=False), tfp.layers.VariationalGaussianProcess( num_inducing_points=num_inducing_points, kernel_provider=RBFKernelFn(), event_shape=[1], inducing_index_points_initializer=tf.constant_initializer( np.linspace(*x_range, num=num_inducing_points, dtype=x.dtype)[..., np.newaxis]), unconstrained_observation_noise_variance_initializer=( tf.constant_initializer(np.array(0.54).astype(x.dtype))), ), ]) # Do inference. batch_size = 32 loss = lambda y, rv_y: rv_y.variational_loss( y, kl_weight=np.array(batch_size, x.dtype) / x.shape[0]) model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=loss) model.fit(x, y, batch_size=batch_size, epochs=1000, verbose=False) # Profit. yhat = model(x_tst)
def main(argv): del argv # unused arg np.random.seed(FLAGS.seed) tf.random.set_seed(FLAGS.seed) tf.io.gfile.makedirs(FLAGS.output_dir) tf1.disable_v2_behavior() session = tf1.Session() with session.as_default(): x_train, y_train, x_test, y_test = datasets.load(session) n_train = x_train.shape[0] num_classes = int(np.amax(y_train)) + 1 if not FLAGS.resnet: model = lenet5(n_train, x_train.shape[1:], num_classes) else: datagen = tf.keras.preprocessing.image.ImageDataGenerator( rotation_range=90, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True) datagen.fit(x_train) model = res_net(n_train, x_train.shape[1:], num_classes, batchnorm=FLAGS.batchnorm, variational='hybrid' if FLAGS.hybrid else 'full') def schedule_fn(epoch): """Learning rate schedule function.""" rate = FLAGS.learning_rate if epoch > 180: rate *= 0.5e-3 elif epoch > 160: rate *= 1e-3 elif epoch > 120: rate *= 1e-2 elif epoch > 80: rate *= 1e-1 return float(rate) lr_callback = tf.keras.callbacks.LearningRateScheduler(schedule_fn) for l in model.layers: l.kl_cost_weight = l.add_weight( name='kl_cost_weight', shape=(), initializer=tf.constant_initializer(0.), trainable=False) l.kl_cost_bias = l.add_variable( name='kl_cost_bias', shape=(), initializer=tf.constant_initializer(0.), trainable=False) [negative_log_likelihood, accuracy, log_likelihood, kl, elbo] = get_losses_and_metrics(model, n_train) metrics = [elbo, log_likelihood, kl, accuracy] tensorboard = tf1.keras.callbacks.TensorBoard( log_dir=FLAGS.output_dir, update_freq=FLAGS.batch_size * FLAGS.validation_freq) if FLAGS.resnet: callbacks = [tensorboard, lr_callback] else: callbacks = [tensorboard] if not FLAGS.resnet or not FLAGS.data_augmentation: def fit_fn(model, steps, initial_epoch=0, with_lr_schedule=FLAGS.resnet): return model.fit( x=x_train, y=y_train, batch_size=FLAGS.batch_size, epochs=initial_epoch + (FLAGS.batch_size * steps) // n_train, initial_epoch=initial_epoch, validation_data=(x_test, y_test), validation_freq=( (FLAGS.validation_freq * FLAGS.batch_size) // n_train), verbose=1, callbacks=callbacks if with_lr_schedule else [tensorboard]) else: def fit_fn(model, steps, initial_epoch=0, with_lr_schedule=FLAGS.resnet): return model.fit_generator( datagen.flow(x_train, y_train, batch_size=FLAGS.batch_size), epochs=initial_epoch + (FLAGS.batch_size * steps) // n_train, initial_epoch=initial_epoch, steps_per_epoch=n_train // FLAGS.batch_size, validation_data=(x_test, y_test), validation_freq=max( (FLAGS.validation_freq * FLAGS.batch_size) // n_train, 1), verbose=1, callbacks=callbacks if with_lr_schedule else [tensorboard]) model.compile( optimizer=tf.keras.optimizers.Adam(lr=float(FLAGS.learning_rate)), loss=negative_log_likelihood, metrics=metrics) session.run(tf1.initialize_all_variables()) train_epochs = (FLAGS.training_steps * FLAGS.batch_size) // n_train fit_fn(model, FLAGS.training_steps) labels = tf.keras.layers.Input(shape=y_train.shape[1:]) ll = tf.keras.backend.function([model.input, labels], [ model.output.distribution.log_prob(tf.squeeze(labels)), model.output.distribution.logits ]) base_metrics = [ ensemble_metrics(x_train, y_train, model, ll), ensemble_metrics(x_test, y_test, model, ll) ] model_dir = os.path.join(FLAGS.output_dir, 'models') tf.io.gfile.makedirs(model_dir) base_model_filename = os.path.join(model_dir, 'base_model.weights') model.save_weights(base_model_filename) # Train base model further for comparison. fit_fn(model, FLAGS.n_auxiliary_variables * FLAGS.auxiliary_sampling_frequency * FLAGS.ensemble_size, initial_epoch=train_epochs) overtrained_metrics = [ ensemble_metrics(x_train, y_train, model, ll), ensemble_metrics(x_test, y_test, model, ll) ] # Perform refined VI. sample_op = [] for l in model.layers: if isinstance( l, tfp.layers.DenseLocalReparameterization) or isinstance( l, tfp.layers.Convolution2DFlipout): weight_op, weight_cost = sample_auxiliary_op( l.kernel_prior.distribution, l.kernel_posterior.distribution, FLAGS.auxiliary_variance_ratio) sample_op.append(weight_op) sample_op.append(l.kl_cost_weight.assign_add(weight_cost)) # Fix the variance of the prior session.run(l.kernel_prior.distribution.istrainable.assign(0.)) if hasattr(l.bias_prior, 'distribution'): bias_op, bias_cost = sample_auxiliary_op( l.bias_prior.distribution, l.bias_posterior.distribution, FLAGS.auxiliary_variance_ratio) sample_op.append(bias_op) sample_op.append(l.kl_cost_bias.assign_add(bias_cost)) # Fix the variance of the prior session.run( l.bias_prior.distribution.istrainable.assign(0.)) ensemble_filenames = [] for i in range(FLAGS.ensemble_size): model.load_weights(base_model_filename) for j in range(FLAGS.n_auxiliary_variables): session.run(sample_op) model.compile( optimizer=tf.keras.optimizers.Adam( # The learning rate is proportional to the scale of the prior. lr=float(FLAGS.learning_rate_for_sampling * np.sqrt(1. - FLAGS.auxiliary_variance_ratio)**j)), loss=negative_log_likelihood, metrics=metrics) fit_fn(model, FLAGS.auxiliary_sampling_frequency, initial_epoch=train_epochs, with_lr_schedule=False) ensemble_filename = os.path.join( model_dir, 'ensemble_component_' + str(i) + '.weights') ensemble_filenames.append(ensemble_filename) model.save_weights(ensemble_filename) auxiliary_metrics = [ ensemble_metrics(x_train, y_train, model, ll, weight_files=ensemble_filenames), ensemble_metrics(x_test, y_test, model, ll, weight_files=ensemble_filenames) ] for metrics, name in [(base_metrics, 'Base model'), (overtrained_metrics, 'Overtrained model'), (auxiliary_metrics, 'Auxiliary sampling')]: logging.info(name) for metrics_dict, split in [(metrics[0], 'Training'), (metrics[1], 'Testing')]: logging.info(split) for metric_name in metrics_dict: logging.info('%s: %s', metric_name, metrics_dict[metric_name])
def EfficientNetV2( width_coefficient, depth_coefficient, default_size, dropout_rate=0.2, drop_connect_rate=0.2, depth_divisor=8, min_depth=8, bn_momentum=0.9, activation="swish", blocks_args="default", model_name="efficientnetv2", include_top=True, weights="imagenet", input_tensor=None, input_shape=None, pooling=None, classes=1000, classifier_activation="softmax", include_preprocessing=True, ): """Instantiates the EfficientNetV2 architecture using given scaling coefficients. Args: width_coefficient: float, scaling coefficient for network width. depth_coefficient: float, scaling coefficient for network depth. default_size: integer, default input image size. dropout_rate: float, dropout rate before final classifier layer. drop_connect_rate: float, dropout rate at skip connections. depth_divisor: integer, a unit of network width. min_depth: integer, minimum number of filters. bn_momentum: float. Momentum parameter for Batch Normalization layers. activation: activation function. blocks_args: list of dicts, parameters to construct block modules. model_name: string, model name. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), `"imagenet"` (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) or numpy array to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False. It should have exactly 3 inputs channels. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - "avg" means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `"max"` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. classifier_activation: A string or callable. The activation function to use on the `"top"` layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the `"top"` layer. include_preprocessing: Boolean, whether to include the preprocessing layer (`Rescaling`) at the bottom of the network. Defaults to `True`. Returns: A `keras.Model` instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. ValueError: if `classifier_activation` is not `"softmax"` or `None` when using a pretrained top layer. """ if blocks_args == "default": blocks_args = DEFAULT_BLOCKS_ARGS[model_name] if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): raise ValueError("The `weights` argument should be either " "`None` (random initialization), `imagenet` " "(pre-training on ImageNet), " "or the path to the weights file to be loaded." f"Received: weights={weights}") if weights == "imagenet" and include_top and classes != 1000: raise ValueError( "If using `weights` as `'imagenet'` with `include_top`" " as true, `classes` should be 1000" f"Received: classes={classes}") # Determine proper input shape input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=default_size, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor bn_axis = 3 if backend.image_data_format() == "channels_last" else 1 x = img_input if include_preprocessing: # Apply original V1 preprocessing for Bx variants # if number of channels allows it num_channels = input_shape[bn_axis - 1] if model_name.split("-")[-1].startswith("b") and num_channels == 3: x = layers.Rescaling(scale=1. / 255)(x) x = layers.Normalization( mean=[0.485, 0.456, 0.406], variance=[0.229**2, 0.224**2, 0.225**2], axis=bn_axis, )(x) else: x = layers.Rescaling(scale=1. / 128.0, offset=-1)(x) # Build stem stem_filters = round_filters( filters=blocks_args[0]["input_filters"], width_coefficient=width_coefficient, min_depth=min_depth, depth_divisor=depth_divisor, ) x = layers.Conv2D( filters=stem_filters, kernel_size=3, strides=2, kernel_initializer=CONV_KERNEL_INITIALIZER, padding="same", use_bias=False, name="stem_conv", )(x) x = layers.BatchNormalization( axis=bn_axis, momentum=bn_momentum, name="stem_bn", )(x) x = layers.Activation(activation, name="stem_activation")(x) # Build blocks blocks_args = copy.deepcopy(blocks_args) b = 0 blocks = float(sum(args["num_repeat"] for args in blocks_args)) for (i, args) in enumerate(blocks_args): assert args["num_repeat"] > 0 # Update block input and output filters based on depth multiplier. args["input_filters"] = round_filters( filters=args["input_filters"], width_coefficient=width_coefficient, min_depth=min_depth, depth_divisor=depth_divisor) args["output_filters"] = round_filters( filters=args["output_filters"], width_coefficient=width_coefficient, min_depth=min_depth, depth_divisor=depth_divisor) # Determine which conv type to use: block = {0: MBConvBlock, 1: FusedMBConvBlock}[args.pop("conv_type")] repeats = round_repeats(repeats=args.pop("num_repeat"), depth_coefficient=depth_coefficient) for j in range(repeats): # The first block needs to take care of stride and filter size increase. if j > 0: args["strides"] = 1 args["input_filters"] = args["output_filters"] x = block( activation=activation, bn_momentum=bn_momentum, survival_probability=drop_connect_rate * b / blocks, name="block{}{}_".format(i + 1, chr(j + 97)), **args, )(x) b += 1 # Build top top_filters = round_filters(filters=1280, width_coefficient=width_coefficient, min_depth=min_depth, depth_divisor=depth_divisor) x = layers.Conv2D( filters=top_filters, kernel_size=1, strides=1, kernel_initializer=CONV_KERNEL_INITIALIZER, padding="same", data_format="channels_last", use_bias=False, name="top_conv", )(x) x = layers.BatchNormalization( axis=bn_axis, momentum=bn_momentum, name="top_bn", )(x) x = layers.Activation(activation=activation, name="top_activation")(x) if include_top: x = layers.GlobalAveragePooling2D(name="avg_pool")(x) if dropout_rate > 0: x = layers.Dropout(dropout_rate, name="top_dropout")(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Dense(classes, activation=classifier_activation, kernel_initializer=DENSE_KERNEL_INITIALIZER, bias_initializer=tf.constant_initializer(0), name="predictions")(x) else: if pooling == "avg": x = layers.GlobalAveragePooling2D(name="avg_pool")(x) elif pooling == "max": x = layers.GlobalMaxPooling2D(name="max_pool")(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name=model_name) # Load weights. if weights == "imagenet": if include_top: file_suffix = ".h5" file_hash = WEIGHTS_HASHES[model_name[-2:]][0] else: file_suffix = "_notop.h5" file_hash = WEIGHTS_HASHES[model_name[-2:]][1] file_name = model_name + file_suffix weights_path = data_utils.get_file(file_name, BASE_WEIGHTS_PATH + file_name, cache_subdir="models", file_hash=file_hash) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def main(argv): del argv # unused arg np.random.seed(FLAGS.seed) tf.random.set_seed(FLAGS.seed) tf.io.gfile.makedirs(FLAGS.output_dir) tf1.disable_v2_behavior() session = tf1.Session() with session.as_default(): x_train, y_train, x_test, y_test = utils.load(FLAGS.dataset) n_train = x_train.shape[0] model = multilayer_perceptron( n_train, x_train.shape[1:], np.std(y_train) + tf.keras.backend.epsilon()) for l in model.layers: l.kl_cost_weight = l.add_weight( name='kl_cost_weight', shape=(), initializer=tf.constant_initializer(0.), trainable=False) l.kl_cost_bias = l.add_variable( name='kl_cost_bias', shape=(), initializer=tf.constant_initializer(0.), trainable=False) [negative_log_likelihood, mse, log_likelihood, kl, elbo] = get_losses_and_metrics(model, n_train) metrics = [elbo, log_likelihood, kl, mse] tensorboard = tf1.keras.callbacks.TensorBoard( log_dir=FLAGS.output_dir, update_freq=FLAGS.batch_size * FLAGS.validation_freq) def fit_fn(model, steps, initial_epoch): return model.fit( x=x_train, y=y_train, batch_size=FLAGS.batch_size, epochs=initial_epoch + (FLAGS.batch_size * steps) // n_train, initial_epoch=initial_epoch, validation_data=(x_test, y_test), validation_freq=max( (FLAGS.validation_freq * FLAGS.batch_size) // n_train, 1), verbose=1, callbacks=[tensorboard]) model.compile( optimizer=tf.keras.optimizers.Adam(lr=float(FLAGS.learning_rate)), loss=negative_log_likelihood, metrics=metrics) session.run(tf1.initialize_all_variables()) train_epochs = (FLAGS.training_steps * FLAGS.batch_size) // n_train fit_fn(model, FLAGS.training_steps, initial_epoch=0) labels = tf.keras.layers.Input(shape=y_train.shape[1:]) ll = tf.keras.backend.function([model.input, labels], [ model.output.distribution.log_prob(labels), model.output.distribution.loc - labels ]) base_metrics = [ utils.ensemble_metrics(x_train, y_train, model, ll), utils.ensemble_metrics(x_test, y_test, model, ll), ] model_dir = os.path.join(FLAGS.output_dir, 'models') tf.io.gfile.makedirs(model_dir) base_model_filename = os.path.join(model_dir, 'base_model.weights') model.save_weights(base_model_filename) # Train base model further for comparison. fit_fn(model, FLAGS.n_auxiliary_variables * FLAGS.auxiliary_sampling_frequency * FLAGS.ensemble_size, initial_epoch=train_epochs) overtrained_metrics = [ utils.ensemble_metrics(x_train, y_train, model, ll), utils.ensemble_metrics(x_test, y_test, model, ll), ] # Perform refined VI. sample_op = [] for l in model.layers: if hasattr(l, 'kernel_prior'): weight_op, weight_cost = sample_auxiliary_op( l.kernel_prior.distribution, l.kernel_posterior.distribution, FLAGS.auxiliary_variance_ratio) sample_op.append(weight_op) sample_op.append(l.kl_cost_weight.assign_add(weight_cost)) # Fix the variance of the prior session.run(l.kernel_prior.distribution.istrainable.assign(0.)) if hasattr(l.bias_prior, 'distribution'): bias_op, bias_cost = sample_auxiliary_op( l.bias_prior.distribution, l.bias_posterior.distribution, FLAGS.auxiliary_variance_ratio) sample_op.append(bias_op) sample_op.append(l.kl_cost_bias.assign_add(bias_cost)) # Fix the variance of the prior session.run( l.bias_prior.distribution.istrainable.assign(0.)) ensemble_filenames = [] for i in range(FLAGS.ensemble_size): model.load_weights(base_model_filename) for j in range(FLAGS.n_auxiliary_variables): session.run(sample_op) model.compile( optimizer=tf.keras.optimizers.Adam( # The learning rate is proportional to the scale of the prior. lr=float(FLAGS.learning_rate_for_sampling * np.sqrt(1. - FLAGS.auxiliary_variance_ratio)**j)), loss=negative_log_likelihood, metrics=metrics) fit_fn(model, FLAGS.auxiliary_sampling_frequency, initial_epoch=train_epochs) ensemble_filename = os.path.join( model_dir, 'ensemble_component_' + str(i) + '.weights') ensemble_filenames.append(ensemble_filename) model.save_weights(ensemble_filename) auxiliary_metrics = [ utils.ensemble_metrics(x_train, y_train, model, ll, weight_files=ensemble_filenames), utils.ensemble_metrics(x_test, y_test, model, ll, weight_files=ensemble_filenames), ] for metrics, name in [(base_metrics, 'Base model'), (overtrained_metrics, 'Overtrained model'), (auxiliary_metrics, 'Auxiliary sampling')]: logging.info(name) for metrics_dict, split in [(metrics[0], 'train'), (metrics[1], 'test')]: logging.info(split) for metric_name in metrics_dict: logging.info('%s: %s', metric_name, metrics_dict[metric_name])
def __call__(self, crop_features, detection_priors, inst_classes, is_training=None): """Generate instance masks from FPN features and detection priors. This corresponds to the Fig. 5-6 of the ShapeMask paper at https://arxiv.org/pdf/1904.03239.pdf Args: crop_features: a float Tensor of shape [batch_size * num_instances, mask_crop_size, mask_crop_size, num_downsample_channels]. This is the instance feature crop. detection_priors: a float Tensor of shape [batch_size * num_instances, mask_crop_size, mask_crop_size, 1]. This is the detection prior for the instance. inst_classes: a int Tensor of shape [batch_size, num_instances] of instance classes. is_training: a bool indicating whether in training mode. Returns: mask_outputs: instance mask prediction as a float Tensor of shape [batch_size * num_instances, mask_size, mask_size, num_classes]. """ # Embed the anchor map into some feature space for anchor conditioning. detection_prior_features = tf.keras.layers.Conv2D( self._num_downsample_channels, kernel_size=(1, 1), bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.keras.initializers.RandomNormal(mean=0., stddev=0.01), padding='same', name='anchor-conv')(detection_priors) prior_conditioned_features = crop_features + detection_prior_features coarse_output_features = self.coarsemask_decoder_net( prior_conditioned_features, is_training) coarse_mask_classes = tf.keras.layers.Conv2D( self._mask_num_classes, kernel_size=(1, 1), # Focal loss bias initialization to have foreground 0.01 probability. bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)), kernel_initializer=tf.keras.initializers.RandomNormal(mean=0, stddev=0.01), padding='same', name='class-predict')(coarse_output_features) if self._use_category_for_mask: inst_classes = tf.cast(tf.reshape(inst_classes, [-1]), tf.int32) coarse_mask_classes_t = tf.transpose(a=coarse_mask_classes, perm=(0, 3, 1, 2)) # pylint: disable=g-long-lambda coarse_mask_logits = tf.cond( pred=tf.size(input=inst_classes) > 0, true_fn=lambda: tf.gather_nd( coarse_mask_classes_t, tf.stack([ tf.range(tf.size(input=inst_classes)), inst_classes - 1 ], axis=1)), false_fn=lambda: coarse_mask_classes_t[:, 0, :, :]) # pylint: enable=g-long-lambda coarse_mask_logits = tf.expand_dims(coarse_mask_logits, -1) else: coarse_mask_logits = coarse_mask_classes coarse_class_probs = tf.nn.sigmoid(coarse_mask_logits) class_probs = tf.cast(coarse_class_probs, prior_conditioned_features.dtype) return coarse_mask_classes, class_probs, prior_conditioned_features
collect_actor.run() dqn_learner.run(iterations=1) if eval_interval and dqn_learner.train_step_numpy % eval_interval == 0: logging.info('Evaluating.') eval_actor.run_and_log() rb_observer.close() reverb_server.stop() logits = functools.partial(tf.keras.layers.Dense, activation=None, kernel_initializer=tf.random_uniform_initializer( minval=-0.03, maxval=0.03), bias_initializer=tf.constant_initializer(-0.2)) dense = functools.partial( tf.keras.layers.Dense, activation=tf.keras.activations.relu, kernel_initializer=tf.compat.v1.variance_scaling_initializer( scale=2.0, mode='fan_in', distribution='truncated_normal')) def main(_): logging.set_verbosity(logging.INFO) tf.enable_v2_behavior() gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_bindings) train_eval(FLAGS.root_dir,
def std_layers(): # TODO(b/179510447): align these parameters with Schulman 17. std_bias_initializer_value = np.log(np.exp(0.35) - 1) return bias_layer.BiasLayer( bias_initializer=tf.constant_initializer( value=std_bias_initializer_value))