Exemplo n.º 1
0
def define_models_GAN(rand_dim, data_dim, base_n_count, type=None):
    generator_input_tensor = layers.Input(shape=(rand_dim, ))
    generated_image_tensor = generator_network(generator_input_tensor,
                                               data_dim, base_n_count)

    generated_or_real_image_tensor = layers.Input(shape=(data_dim, ))

    if type == 'Wasserstein':
        discriminator_output = critic_network(generated_or_real_image_tensor,
                                              data_dim, base_n_count)
    else:
        discriminator_output = discriminator_network(
            generated_or_real_image_tensor, data_dim, base_n_count)

    generator_model = models.Model(inputs=[generator_input_tensor],
                                   outputs=[generated_image_tensor],
                                   name='generator')
    discriminator_model = models.Model(inputs=[generated_or_real_image_tensor],
                                       outputs=[discriminator_output],
                                       name='discriminator')

    combined_output = discriminator_model(
        generator_model(generator_input_tensor))
    combined_model = models.Model(inputs=[generator_input_tensor],
                                  outputs=[combined_output],
                                  name='combined')

    return generator_model, discriminator_model, combined_model
Exemplo n.º 2
0
def rnn_model(params, training_dr_lstm=True, training_dr_ll=True):
    """RNN model for text."""
    input_shape = (params['fix_len'])
    seq_input = layers.Input(shape=input_shape)
    # vocab+1 because of padding
    seq_emb = layers.Embedding(params['vocab_size'] + 1,
                               params['emb_size'],
                               input_length=params['fix_len'])(seq_input)
    lstm_out = layers.LSTM(params['hidden_lstm_size'],
                           dropout=params['dropout_rate_lstm'])(
                               seq_emb, training=training_dr_lstm)
    out = layers.Dropout(rate=params['dropout_rate'],
                         seed=params['random_seed'])(lstm_out,
                                                     training=training_dr_ll)
    if params['variational']:
        # scale kl loss by number of training examples.
        # larger training dataset depends less on prior
        def scaled_kl_fn(p, q, _):
            return tfp.distributions.kl_divergence(q, p) / params['n_train']

        logits = tfpl.DenseReparameterization(
            params['n_class_in'],
            activation=None,
            kernel_divergence_fn=scaled_kl_fn,
            bias_posterior_fn=tfpl.util.default_mean_field_normal_fn(),
            name='last_layer')(out)
    else:
        logits = layers.Dense(
            params['n_class_in'],
            activation=None,
            kernel_regularizer=regularizers.l2(params['reg_weight']),
            bias_regularizer=regularizers.l2(params['reg_weight']),
            name='last_layer')(out)
    probs = layers.Softmax(axis=1)(logits)
    return models.Model(seq_input, probs, name='rnn')
def get_model(cfg, encoder_inputs, encoder_outputs):

    decoder_inputs = layers.Input(shape=(None, ),
                                  name='Decoder-Input')  # for teacher forcing

    dec_emb = layers.Embedding(cfg.num_input_tokens,
                               cfg.latent_dim,
                               name='Decoder-Embedding',
                               mask_zero=False)(decoder_inputs)

    dec_bn = layers.BatchNormalization(name='Decoder-Batchnorm-1')(dec_emb)

    decoder_gru = layers.GRU(cfg.latent_dim,
                             return_state=True,
                             return_sequences=True,
                             name='Decoder-GRU')

    decoder_gru_output, _ = decoder_gru(dec_bn, initial_state=encoder_outputs)

    x = layers.BatchNormalization(
        name='Decoder-Batchnorm-2')(decoder_gru_output)
    decoder_dense = layers.Dense(cfg.num_output_tokens,
                                 activation='softmax',
                                 name='Final-Output-Dense')

    decoder_outputs = decoder_dense(x)

    model = models.Model([encoder_inputs, decoder_inputs], decoder_outputs)

    return model
Exemplo n.º 4
0
  def encoder_model(architecture='inception_v3', pre_trained_dataset='imagenet',
                    downsample_factor=8):
    """Returns encoder model.

    Defines the encoder model to learn the representations for image dataset.
    In this example, we are considering the InceptionV3 model trained on
    ImageNet dataset, followed by simple average pooling-based downsampling.

    Args:
      architecture: Base architecture of encoder model (e.g. 'inception_v3')
      pre_trained_dataset: The dataset used to pre-train the encoder model
      downsample_factor: Downsample factor for the outputs

    Raises:
      NameError: Returns name errors if architecture is not 'inception_v3'
    """
    tf_input = layers.Input(shape=(input_shape[0], input_shape[1], 3))
    if architecture == 'inception_v3':
      model = applications.inception_v3.InceptionV3(
          input_tensor=tf_input, weights=pre_trained_dataset, include_top=False)
      output_pooled = \
          layers.AveragePooling2D((downsample_factor, downsample_factor),
                                  strides=(downsample_factor,
                                           downsample_factor))(model.output)
    else:
      raise NameError('Invalid architecture')
    return models.Model(model.input, output_pooled)
Exemplo n.º 5
0
def define_models_CGAN(rand_dim, data_dim, label_dim, base_n_count, type=None):
    generator_input_tensor = layers.Input(shape=(rand_dim, ))
    labels_tensor = layers.Input(shape=(label_dim, ))  # updated for class
    generated_image_tensor = generator_network_w_label(
        generator_input_tensor, labels_tensor, data_dim, label_dim,
        base_n_count)  # updated for class

    generated_or_real_image_tensor = layers.Input(
        shape=(data_dim + label_dim, ))  # updated for class

    if type == 'Wasserstein':
        discriminator_output = critic_network(
            generated_or_real_image_tensor, data_dim + label_dim,
            base_n_count)  # updated for class
    else:
        discriminator_output = discriminator_network(
            generated_or_real_image_tensor, data_dim + label_dim,
            base_n_count)  # updated for class

    generator_model = models.Model(
        inputs=[generator_input_tensor, labels_tensor],
        outputs=[generated_image_tensor],
        name='generator')  # updated for class
    discriminator_model = models.Model(inputs=[generated_or_real_image_tensor],
                                       outputs=[discriminator_output],
                                       name='discriminator')

    combined_output = discriminator_model(
        generator_model([generator_input_tensor,
                         labels_tensor]))  # updated for class
    combined_model = models.Model(
        inputs=[generator_input_tensor, labels_tensor],
        outputs=[combined_output],
        name='combined')  # updated for class

    return generator_model, discriminator_model, combined_model
Exemplo n.º 6
0
def get_decoder_model(model):

    latent_dim = model.get_layer('Decoder-Embedding').output_shape[-1]

    decoder_inputs = model.get_layer('Decoder-Input').input
    dec_emb = model.get_layer('Decoder-Embedding')(decoder_inputs)
    dec_bn = model.get_layer('Decoder-Batchnorm-1')(dec_emb)

    gru_inference_state_input = layers.Input(shape=(latent_dim, ),
                                             name='hidden_state_input')

    gru_out, gru_state_out = model.get_layer('Decoder-GRU') \
       ([dec_bn, gru_inference_state_input])

    dec_bn2 = model.get_layer('Decoder-Batchnorm-2')(gru_out)
    dense_out = model.get_layer('Final-Output-Dense')(dec_bn2)
    decoder_model = models.Model([decoder_inputs, gru_inference_state_input],
                                 [dense_out, gru_state_out])
    return decoder_model
def get_encoder_model(cfg):
    encoder_inputs = layers.Input(shape=(cfg.len_input_seq, ),
                                  name='Encoder-Input')

    x = layers.Embedding(cfg.num_input_tokens,
                         cfg.latent_dim,
                         name='Encoder-Embedding',
                         mask_zero=False)(encoder_inputs)

    x = layers.BatchNormalization(name='Encoder-Batchnorm-1')(x)

    _, state_h = layers.GRU(cfg.latent_dim, return_state=True,\
       name='Encoder-Last-GRU')(x)

    encoder_model = models.Model(inputs=encoder_inputs,
                                 outputs=state_h,
                                 name='Encoder-Model')

    encoder_outputs = encoder_model(encoder_inputs)

    return encoder_model, encoder_inputs, encoder_outputs
Exemplo n.º 8
0
def ResNet50(method, num_classes, num_updates, dropout_rate):
    """Instantiates the ResNet50 architecture.

  Args:
    method: `str`, method for accounting for uncertainty. Must be one of
      ['vanilla', 'll_dropout', 'll_svi', 'dropout', 'svi', 'dropout_nofirst']
    num_classes: `int` number of classes for image classification.
    num_updates: integer, total steps in an epoch (for weighting the loss)
    dropout_rate: Dropout rate for ll_dropout, dropout methods.

  Returns:
      A Keras model instance.
  pylint: disable=invalid-name
  """

    # Determine proper input shape
    if backend.image_data_format() == 'channels_first':
        input_shape = (3, 224, 224)
        bn_axis = 1
    else:
        input_shape = (224, 224, 3)
        bn_axis = 3

    if (method in ['dropout', 'll_dropout', 'dropout_nofirst'
                   ]) != (dropout_rate > 0.):
        raise ValueError(
            'Dropout rate should be nonzero iff a dropout method is used.'
            'Method is {}, dropout is {}.'.format(method, dropout_rate))

    use_variational_layers = method == 'svi'
    hidden_layer_dropout = dropout_rate if method in [
        'dropout', 'dropout_nofirst'
    ] else 0.

    img_input = layers.Input(shape=input_shape)
    x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
    if (dropout_rate > 0.) and (method != 'dropout_nofirst'):
        x = layers.Dropout(hidden_layer_dropout)(x, training=True)
    x = layers.Conv2D(64, (7, 7),
                      use_bias=False,
                      strides=(2, 2),
                      padding='valid',
                      kernel_initializer='he_normal',
                      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                      name='conv1')(x)
    x = layers.BatchNormalization(axis=bn_axis,
                                  momentum=BATCH_NORM_DECAY,
                                  epsilon=BATCH_NORM_EPSILON,
                                  name='bn_conv1')(x)
    x = layers.Activation('relu')(x)
    x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    conv_block = functools.partial(
        conv_block_base,
        num_updates=num_updates,
        dropout_rate=hidden_layer_dropout,
        use_variational_layers=use_variational_layers)
    identity_block = functools.partial(
        identity_block_base,
        num_updates=num_updates,
        dropout_rate=hidden_layer_dropout,
        use_variational_layers=use_variational_layers)

    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')

    x = layers.GlobalAveragePooling2D(name='avg_pool')(x)

    if dropout_rate > 0.:
        x = layers.Dropout(dropout_rate)(x, training=True)

    if method in ['ll_svi', 'svi']:

        x = tfpl.dense_variational_v2.DenseVariational(
            units=num_classes,
            make_posterior_fn=posterior_mean_field,
            make_prior_fn=functools.partial(prior_trainable,
                                            num_updates=num_updates),
            use_bias=True,
            kl_weight=1. / num_updates,
            kl_use_exact=True,
            name='fc1000')(x)
    else:
        x = layers.Dense(num_classes,
                         kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                         bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
                         name='fc1000')(x)

    # Create model.
    return models.Model(img_input, x, name='resnet50')
def main(_):

    if FLAGS.is_tempscale:
        tf.enable_v2_behavior()

    params = {
        'num_epochs': FLAGS.num_epochs,
        'fix_len': FLAGS.fix_len,
        'batch_size': FLAGS.batch_size,
        'n_class': FLAGS.n_class,
        'emb_size': FLAGS.emb_size,
        'vocab_size': FLAGS.vocab_size,
        'hidden_lstm_size': FLAGS.hidden_lstm_size,
        'dropout_rate': FLAGS.dropout_rate,
        'dropout_rate_lstm': FLAGS.dropout_rate_lstm,
        'learning_rate': FLAGS.learning_rate,
        'reg_weight': FLAGS.reg_weight,
        'tr_out_dir': FLAGS.tr_out_dir,
        'data_pkl_file': FLAGS.data_pkl_file,
        'master': FLAGS.master,
        'clip_norm': FLAGS.clip_norm,
        'random_seed': FLAGS.random_seed,
        'variational': FLAGS.variational,
        'n_class_in': None,
        'n_train': None,
    }

    # load in-dist. and skewed in-dist. datasets
    data = classifier.load_np_dataset(params['data_pkl_file'])

    # load OOD dataset
    n_ood = 5600
    test_lm1b_x_pad, _ = load_ood_dataset(n_ood, params['fix_len'], data.vocab,
                                          params['vocab_size'])

    # list of ckpt dir
    model_dir = os.path.join(FLAGS.model_dir, FLAGS.method)
    ckpt_dirs = tf.io.gfile.listdir(model_dir)

    # how many replicates for ensemble
    if FLAGS.is_ensemble:
        assert len(ckpt_dirs) > 1
        n_ensemble = len(ckpt_dirs)
        if n_ensemble == 0:
            logging.fatal('no model ckpt')
    else:
        n_ensemble = 1

    pred = {}  # dict for final prediction score
    # dict for saving pred from different models
    pred_accum = {'in': [], 'skew': [], 'ood': []}

    for i in range(n_ensemble):

        ckpt_dir = os.path.join(model_dir, ckpt_dirs[i], 'model')
        if not tf.io.gfile.isdir(ckpt_dir):
            continue
        print('ckpt_dir={}'.format(ckpt_dir))

        # load params
        with tf.gfile.GFile(os.path.join(ckpt_dir, 'params.json'),
                            mode='rb') as f:
            params_json = yaml.safe_load(f)
            params.update(params_json)
            params['master'] = ''
        print('params after load={}'.format(params))

        tf.reset_default_graph()
        # create model
        model = classifier.rnn_model(
            params,
            training_dr_lstm=params['dropout_rate_lstm'] != 0.0,
            training_dr_ll=params['dropout_rate'] != 0.0)

        # load model
        model.load_weights(ckpt_dir + '/model.ckpt')

        # predict
        if FLAGS.method in ['ll-svi', 'dropout', 'll-dropout']:
            # need to run multiple times and get mean prediction
            assert FLAGS.n_pred_sample > 1
        else:
            FLAGS.n_pred_sample = 1

        pred_k = {'in': [], 'skew': [], 'ood': []}
        for _ in range(FLAGS.n_pred_sample):
            pred_tr_in = model.predict(data.in_sample_examples)
            acc_tr_in = np.mean(
                data.in_sample_labels == np.argmax(pred_tr_in, axis=1))

            pred_test_in = model.predict(data.test_in_sample_examples)
            acc_test_in = np.mean(
                data.test_in_sample_labels == np.argmax(pred_test_in, axis=1))
            print('in-dist. acc_tr={}, acc_test={}'.format(
                acc_tr_in, acc_test_in))

            pred_test_skew = model.predict(data.test_oos_examples)
            pred_test_ood = model.predict(test_lm1b_x_pad)

            if FLAGS.is_tempscale:
                # temperature scaling
                # logits for temp scaling
                last_layer_model = models.Model(
                    inputs=model.input,
                    outputs=model.get_layer('last_layer').output)
                logits = last_layer_model.predict(data.dev_in_sample_examples)
                opt_temp = calibration_lib.find_scaling_temperature(
                    data.dev_in_sample_labels, logits, temp_range=(1e-5, 1e5))
                pred_test_in = calibration_lib.apply_temperature_scaling(
                    opt_temp, pred_test_in)
                pred_test_skew = calibration_lib.apply_temperature_scaling(
                    opt_temp, pred_test_skew)
                pred_test_ood = calibration_lib.apply_temperature_scaling(
                    opt_temp, pred_test_ood)

            # save in a list
            pred_k['in'].append(pred_test_in)
            pred_k['skew'].append(pred_test_skew)
            pred_k['ood'].append(pred_test_ood)

        pred_k_in_mean = np.mean(np.stack(pred_k['in']), axis=0)
        pred_k_skew_mean = np.mean(np.stack(pred_k['skew']), axis=0)
        pred_k_ood_mean = np.mean(np.stack(pred_k['ood']), axis=0)

        pred_accum['in'].append(pred_k_in_mean)
        pred_accum['skew'].append(pred_k_skew_mean)
        pred_accum['ood'].append(pred_k_ood_mean)

    # if ensemble, then take the mean
    pred['in'] = np.mean(np.stack(pred_accum['in']), axis=0)
    pred['skew'] = np.mean(np.stack(pred_accum['skew']), axis=0)
    pred['ood'] = np.mean(np.stack(pred_accum['ood']), axis=0)

    # prediction accuracy for in-dist.
    pred['in_true_labels'] = data.test_in_sample_labels
    acc = np.mean(data.test_in_sample_labels == np.argmax(pred['in'], axis=1))
    print('== (optionally ensemble) acc={} =='.format(acc))

    print('== eval in and skew using max(Py|x) ==')
    neg = list(np.max(pred['in'], axis=1))
    pos = list(np.max(pred['skew'], axis=1))
    print('auc={}'.format(compute_auc(neg, pos, pos_label=0)))

    print('== eval in and ood using max(Py|x) ==')
    neg = list(np.max(pred['in'], axis=1))
    pos = list(np.max(pred['ood'], axis=1))
    print('auc={}'.format(compute_auc(neg, pos, pos_label=0)))

    # save the predictions
    pred_file_name = 'pred_nensemb{}_npred{}_tempscale{}.pkl'.format(
        len(pred_accum['in']), FLAGS.n_pred_sample, FLAGS.is_tempscale)
    with tf.gfile.Open(os.path.join(model_dir, pred_file_name), 'wb') as f:
        pickle.dump(pred, f, protocol=2)
Exemplo n.º 10
0
    def __init__(self,
                 obs_size,
                 action_size,
                 actor_model=None,
                 critic_model=None,
                 use_target_network=False,
                 learning_rate=1e-3,
                 reward_discount=0.99,
                 tau=0.001):
        self.obs_size = obs_size
        self.action_size = action_size
        self.use_target_network = use_target_network
        self.lr = learning_rate
        self.rd = reward_discount
        self.tau = tau

        # Create models if not provided
        if actor_model is None:
            actor_model = models.Sequential()
            actor_model.add(
                layers.Dense(16, input_shape=obs_size, activation='relu'))
            actor_model.add(layers.Dense(16, activation='relu'))
            actor_model.add(layers.Dense(16, activation='relu'))
            actor_model.add(
                layers.Dense(action_size, name='action', activation='tanh'))
            actor_model.summary()
        self.actor_model = actor_model

        if critic_model is None:
            state_input = layers.Input(shape=obs_size)
            action_input = layers.Input(shape=action_size)
            all_input = layers.Concatenate()([state_input, action_input])
            h1 = layers.Dense(32, activation='relu')(all_input)
            h2 = layers.Dense(32, activation='relu')(h1)
            h3 = layers.Dense(32, activation='relu')(h2)
            output = layers.Dense(1, name='q-value')(h3)
            critic_model = models.Model(inputs=[state_input, action_input],
                                        outputs=output)
            critic_model.summary()
        self.critic_model = critic_model

        if use_target_network:
            self.target_network_critic = tf.keras.models.clone_model(
                self.critic_model)
            self.target_network_actor = tf.keras.models.clone_model(
                self.actor_model)

        self.state_ph = tf.placeholder(tf.float32, shape=(None, *obs_size))
        self.actions_ph = tf.placeholder(tf.float32, shape=(None, action_size))
        self.rewards_ph = tf.placeholder(tf.float32, shape=(None))
        self.next_states_ph = tf.placeholder(tf.float32,
                                             shape=(None, *obs_size))
        self.is_done_ph = tf.placeholder(tf.float32, shape=(None))

        self.loss = self.q_loss(self.state_ph, self.actions_ph,
                                self.rewards_ph, self.next_states_ph,
                                self.is_done_ph)
        aer = self.action_expected_reward(self.state_ph)

        self.train_critic_step = tf.train.AdamOptimizer(
            learning_rate).minimize(
                self.loss, var_list=self.critic_model.trainable_variables)
        self.train_actor_step = tf.train.AdamOptimizer(
            learning_rate / 10).minimize(
                -aer, var_list=self.actor_model.trainable_variables)
        sess.run(tf.global_variables_initializer())