def create_img2attr(self, kernel_initializer = 'he_normal', img_flat_len = 1024): attr_input = layers.Input(shape = (50,), name = 'attr') word_emb = layers.Input(shape = (600,), name = 'wv') imag_classifier = layers.Input(shape = (img_flat_len,), name = 'img') attr_dense = layers.Dense(600, use_bias = True, kernel_initializer=kernel_initializer, kernel_regularizer = l2(1e-4), name = 'attr_dense')(attr_input) attr_word_emb = layers.Concatenate(name = 'attr_word_emb')([word_emb, attr_dense]) out_size = 50 attr_preds = self.full_connect_layer(imag_classifier, hidden_dim = [ int(out_size * 20), int(out_size * 15), # int(out_size * 7), # int(img_flat_len * 1.125), # int(img_flat_len * 1.0625) ], \ activation = 'relu', resnet = False, drop_out_ratio = 0.2) attr_preds = self.full_connect_layer(attr_preds, hidden_dim = [out_size], activation = 'sigmoid') log_loss = K.mean(binary_crossentropy(attr_input, attr_preds)) model = Model([attr_input, word_emb, imag_classifier], outputs = [attr_preds]) #, vgg_output]) model.add_loss(log_loss) model.compile(optimizer=Adam(lr=1e-5), loss=None) return model
def create_rnn_model(self): """ """ seq_input = Input(shape=(self.dense_input_len, 1)) seq_output = Input(shape=(self.dense_input_len, 1)) # norm_seq_input = BatchNormalization(name = 'Dense_BN_trainable')(seq_input) rnn_out = Bidirectional( LSTM(self.rnn_units[0], return_sequences=True, activation='relu'))(seq_input) rnn_out = Bidirectional( LSTM(self.rnn_units[1], return_sequences=True, activation='relu'))(rnn_out) seq_pred = TimeDistributed(Dense(self.hidden_dim[0], activation='relu'))(rnn_out) seq_pred = TimeDistributed(Dense(1, activation='relu'))(seq_pred) # seq_pred = Dense(1, activation = 'relu')(rnn_out) seq_pred = Reshape((self.dense_input_len, ))(seq_pred) seq_input_reshape = Reshape((self.dense_input_len, ))(seq_input) model = Model(seq_input, seq_pred) loss = K.mean( mean_squared_error(seq_input_reshape[:, 1:], seq_pred[:, :-1])) model.add_loss(loss) # def _mean_squared_error(y_true, y_pred): # return K.mean(K.square(y_pred - y_true)) model.compile(optimizer='adam', loss=None) #_mean_squared_error) return model
def create_dem_aug(self, kernel_initializer = 'he_normal', img_flat_len = 1024): attr_input = layers.Input(shape = (50,), name = 'attr') word_emb = layers.Input(shape = (600,), name = 'wv') img_input = layers.Input(shape = (64, 64, 3)) # imag_classifier = layers.Input(shape = (img_flat_len,), name = 'img') self.img_flat_model.trainable = False imag_classifier = self.img_flat_model(img_input) attr_dense = layers.Dense(600, use_bias = True, kernel_initializer=kernel_initializer, kernel_regularizer = l2(1e-4), name = 'attr_dense')(attr_input) if self.only_emb: attr_word_emb = word_emb else: attr_word_emb = layers.Concatenate(name = 'attr_word_emb')([word_emb, attr_dense]) attr_word_emb_dense = self.full_connect_layer(attr_word_emb, hidden_dim = [ int(img_flat_len * 2), int(img_flat_len * 1.5), int(img_flat_len * 1.25), # int(img_flat_len * 1.125), # int(img_flat_len * 1.0625) ], \ activation = 'relu', resnet = False, drop_out_ratio = 0.2) attr_word_emb_dense = self.full_connect_layer(attr_word_emb_dense, hidden_dim = [img_flat_len], activation = 'relu') mse_loss = K.mean(mean_squared_error(imag_classifier, attr_word_emb_dense)) model = Model([img_input, attr_input, word_emb], outputs = [attr_word_emb_dense, imag_classifier]) #, vgg_output]) model.add_loss(mse_loss) model.compile(optimizer=Adam(lr=1e-4), loss=None) return model
def GetModel(): base_model = MobileNetV2(input_shape=(224, 224, 3), weights='imagenet', include_top=False, pooling='max') for layer in base_model.layers: layer.trainable = False x = base_model.output x = Dropout(0.6)(x) x = Dense(embedding_dim)(x) x = Lambda(lambda x: K.l2_normalize(x, axis=1))(x) embedding_model = Model(base_model.input, x, name='embedding') input_shape = (image_size, image_size, 3) anchor_input = Input(input_shape, name='anchor_input') positive_input = Input(input_shape, name='positive_input') negative_input = Input(input_shape, name='negative_input') anchor_embedding = embedding_model(anchor_input) positive_embedding = embedding_model(positive_input) negative_embedding = embedding_model(negative_input) inputs = [anchor_input, positive_input, negative_input] outputs = [anchor_embedding, positive_embedding, negative_embedding] triplet_model = Model(inputs, outputs) triplet_model.add_loss(K.mean(triplet_loss(outputs))) return embedding_model, triplet_model
def train(self, data): """Pretrain the latent layers of the model.""" # network parameters original_dim = data.shape[1] input_shape = (original_dim,) # build encoder model inputs = Input(shape=input_shape, name='encoder_input') hidden = inputs for i, hidden_dim in enumerate(self.hidden_dim, 1): hidden = Dense(hidden_dim, activation='sigmoid', name='hidden_e_{}'.format(i))(hidden) logger.debug("Hooked up hidden layer with %d neurons" % hidden_dim) z_mean = Dense(params_training.num_latent, activation=None, name='z_mean')(hidden) z_log_sigma = Dense(params_training.num_latent, activation=None, name='z_log_sigma')(hidden) z = Lambda(self.sampling, output_shape=(params_training.num_latent,), name='z')( [z_mean, z_log_sigma]) encoder = Model(inputs, [z_mean, z_log_sigma, z], name='encoder') self.encoder_z_mean = encoder.predict(data)[0] # build decoder model latent_inputs = Input(shape=(params_training.num_latent,), name='z_sampling') hidden = latent_inputs for i, hidden_dim in enumerate(self.hidden_dim[::-1], 1): # Reverse because decoder. hidden = Dense(hidden_dim, activation='sigmoid', name='hidden_d_{}'.format(i))(hidden) logger.debug("Hooked up hidden layer with %d neurons" % hidden_dim) # if hidden == latent_inputs: # logger.warning("No Hidden layers hooked up.") outputs = Dense(original_dim, activation='sigmoid')(hidden) decoder = Model(latent_inputs, outputs, name='decoder') # Build the CVAE auto-encoder outputs = decoder(encoder(inputs)[2]) cvae_model = Model(inputs, outputs, name='cvae') # Load the pre-trained weights. self.load_pretrain_weights(cvae_model) reconstruction_loss = binary_crossentropy(inputs, outputs) * original_dim kl_loss = 1 + z_log_sigma - tf.square(z_mean) - tf.exp(z_log_sigma) kl_loss = -0.5 * tf.reduce_sum(kl_loss, axis=-1) cvae_model.add_loss(tf.reduce_mean(reconstruction_loss + kl_loss)) cvae_model.compile(optimizer='adam') cvae_model.summary() # First load the weights from the pre-training if self.pretrain_weights: cvae_model = self.load_pretrain_weights(cvae_model) saver = ModelCheckpoint( check_path(TEMPORARY_CVAE_PATH), save_weights_only=True, verbose=1 ) tensorboard_config = TensorBoard(log_dir=check_path(TEMPORARY_CVAE_PATH)) # train the auto-encoder cvae_model.fit(data, epochs=params_training.num_epochs, batch_size=params_training.batch_size, callbacks=[saver, tensorboard_config]) return self.encoder_z_mean
def train(self, data): """Pretrain the latent layers of the model.""" # network parameters original_dim = data.shape[1] input_shape = (original_dim, ) batch_size = train_params.batch_size latent_dim = train_params.num_latent epochs = train_params.num_epochs # build encoder model inputs = Input(shape=input_shape, name='encoder_input') inputs_noisy = inputs z_mean = Dense(latent_dim, activation=None, name='z_mean') z_mean = z_mean(inputs_noisy) z_log_sigma = Dense(latent_dim, activation=None, name='z_log_sigma') z_log_sigma = z_log_sigma(inputs_noisy) z = Lambda(self.sampling, output_shape=(latent_dim, ), name='z')([z_mean, z_log_sigma]) encoder = Model(inputs, [z_mean, z_log_sigma, z], name='encoder') # build decoder model latent_inputs = Input(shape=(latent_dim, ), name='z_sampling') outputs = Dense(original_dim, activation='sigmoid', name="decoder_l")(latent_inputs) decoder = Model(latent_inputs, outputs, name='decoder') # Build the DAE outputs = decoder(encoder(inputs)[2]) latent_model = Model(inputs, outputs, name='vae_mlp') reconstruction_loss = binary_crossentropy(inputs, outputs) * original_dim kl_loss = 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) latent_model.add_loss(vae_loss) latent_model.compile(optimizer='adam') saver = ModelCheckpoint(check_path(TEMPORARY_LATENT_PATH), save_weights_only=True, verbose=1) tensorboard_config = TensorBoard( log_dir=check_path(TEMPORARY_LATENT_PATH)) logger.info("Model checkpoints has ben saved.") # train the autoencoder latent_model.fit(data, epochs=epochs, batch_size=batch_size, callbacks=[saver, tensorboard_config]) # Collect the weights for z_log_sigma and z_mean, the layers being pretrained. self.weights.append( latent_model.get_layer("encoder").get_layer( "z_mean").get_weights()) self.weights.append( latent_model.get_layer("encoder").get_layer( "z_log_sigma").get_weights()) self.de_weights.append( latent_model.get_layer("decoder").get_layer( "decoder_l").get_weights()) logger.info("Weights has been updated successfully.")
def create_model(self): """ """ # VAE model = encoder + decoder # build encoder model input_shape = (self.original_dim, ) inputs = Input(shape=input_shape, name='encoder_input') x = Dense(self.intermediate_dim, activation='relu')(inputs) z_mean = Dense(self.latent_dim, name='z_mean')(x) z_log_var = Dense(self.latent_dim, name='z_log_var')(x) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(self.sampling, name='z')([z_mean, z_log_var]) # instantiate encoder model # encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') # print(encoder.summary()) # plot_model(encoder, to_file='vae_mlp_encoder.png', show_shapes=True) # build decoder model # latent_inputs = Input(shape=(self.latent_dim,), name='z_sampling') # x = Dense(self.intermediate_dim, activation='relu')(latent_inputs) x = Dense(self.intermediate_dim, activation='relu')(z) outputs = Dense(self.original_dim, activation='sigmoid')(x) # instantiate decoder model # decoder = Model(latent_inputs, outputs, name='decoder') # print(decoder.summary()) # plot_model(decoder, to_file='vae_mlp_decoder.png', show_shapes=True) # instantiate VAE model # outputs = decoder(encoder(inputs)[2]) vae = Model(inputs, outputs, name='vae_mlp') # VAE loss = mse_loss or xent_loss + kl_loss if self.mse: reconstruction_loss = mean_squared_error(inputs, outputs) else: reconstruction_loss = binary_crossentropy(inputs, outputs) reconstruction_loss *= self.original_dim kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='adam', loss = None) # print (vae.summary()) return vae
def create_dem_bc_aug(self, kernel_initializer = 'he_normal', img_flat_len = 1024, only_emb = False): attr_input = layers.Input(shape = (self.attr_len,), name = 'attr') word_emb = layers.Input(shape = (self.wv_len,), name = 'wv') img_input = layers.Input(shape = (self.pixel, self.pixel, 3)) label = layers.Input(shape = (1,), name = 'label') # img_flat_model = Model(inputs = self.img_model[0].inputs, outputs = self.img_model[0].get_layer(name = 'avg_pool').output) imag_classifier = self.img_flat_model(img_input) if self.attr_emb_transform == 'flat': attr_emb = layers.Embedding(294, self.attr_emb_len)(attr_input) attr_dense = layers.Flatten()(attr_emb) #layers.GlobalAveragePooling1D()(attr_emb) elif self.attr_emb_transform == 'dense': attr_dense = layers.Dense(self.attr_emb_len, use_bias = True, kernel_initializer=kernel_initializer, kernel_regularizer = l2(1e-4), name = 'attr_dense')(attr_input) if only_emb: attr_word_emb = word_emb else: attr_word_emb = layers.Concatenate(name = 'attr_word_emb')([word_emb, attr_dense]) attr_word_emb_dense = self.full_connect_layer(attr_word_emb, hidden_dim = [ # int(img_flat_len * 4), int(img_flat_len * 2), int(img_flat_len * 1.5), int(img_flat_len * 1.25), # int(img_flat_len * 1.125), int(img_flat_len) ], \ activation = 'relu', resnet = False, drop_out_ratio = 0.2) # attr_word_emb_dense = self.full_connect_layer(attr_word_emb_dense, hidden_dim = [img_flat_len], # activation = 'relu') attr_x_img = layers.Lambda(lambda x: x[0] * x[1], name = 'attr_x_img')([attr_word_emb_dense, imag_classifier]) # attr_x_img = layers.Concatenate(name = 'attr_x_img')([attr_word_emb_dense, imag_classifier]) attr_img_input = layers.Input(shape = (img_flat_len,), name = 'attr_img_input') # attr_img_input = layers.Input(shape = (img_flat_len * 2,), name = 'attr_img_input') proba = self.full_connect_layer(attr_img_input, hidden_dim = [1], activation = 'sigmoid') attr_img_model = Model(inputs = attr_img_input, outputs = proba, name = 'attr_x_img_model') out = attr_img_model([attr_x_img]) # dem_bc_model = self.create_dem_bc(kernel_initializer = 'he_normal', # img_flat_len = img_flat_len, # only_emb = only_emb) # attr_word_emb_dense, out = dem_bc_model([imag_classifier, attr_input, word_emb, label]) bc_loss = K.mean(binary_crossentropy(label, out)) model = Model([img_input, attr_input, word_emb, label], outputs = [attr_word_emb_dense, out, imag_classifier]) model.add_loss(bc_loss) model.compile(optimizer=Adam(lr=1e-4), loss=None) return model
def create_res_dem_bc(self, kernel_initializer = 'he_normal', img_flat_len = 1024, only_emb = False): attr_input = layers.Input(shape = (50,), name = 'attr') word_emb = layers.Input(shape = (self.wv_len,), name = 'wv') imag_classifier = layers.Input(shape = (img_flat_len,), name = 'img') label = layers.Input(shape = (1,), name = 'label') attr_dense = layers.Dense(self.wv_len, use_bias = True, kernel_initializer=kernel_initializer, kernel_regularizer = l2(1e-4), name = 'attr_dense')(attr_input) ini_dem_model = self.create_dem_bc(kernel_initializer = 'he_normal', img_flat_len = img_flat_len, only_emb = True) ini_dem_model.load_weights('./only_emb.h5') ini_dem_model_part = Model(inputs = ini_dem_model.inputs[2], outputs = ini_dem_model.outputs[0]) ini_dem_model_part.trainable = False ini_attr_word_emb_dense = ini_dem_model_part([word_emb]) if only_emb: attr_word_emb = word_emb else: attr_word_emb = layers.Concatenate(name = 'attr_word_emb')([word_emb, attr_dense]) attr_word_emb_dense = self.full_connect_layer(attr_word_emb, hidden_dim = [ int(img_flat_len * 2), int(img_flat_len * 1.5), int(img_flat_len * 1.25), int(img_flat_len) ], \ activation = 'relu', resnet = False, drop_out_ratio = 0.2) attr_word_emb_dense = layers.Lambda(lambda x: x[0] + x[1])([attr_word_emb_dense, ini_attr_word_emb_dense]) attr_x_img = layers.Lambda(lambda x: x[0] * x[1], name = 'attr_x_img')([attr_word_emb_dense, imag_classifier]) # attr_x_img = layers.Concatenate(name = 'attr_x_img')([attr_word_emb_dense, imag_classifier]) attr_img_input = layers.Input(shape = (img_flat_len,), name = 'attr_img_input') # attr_img_input = layers.Input(shape = (img_flat_len * 2,), name = 'attr_img_input') proba = self.full_connect_layer(attr_img_input, hidden_dim = [1], activation = 'sigmoid') attr_img_model = Model(inputs = attr_img_input, outputs = proba, name = 'attr_x_img_model') out = attr_img_model([attr_x_img]) bc_loss = K.mean(binary_crossentropy(label, out)) model = Model([imag_classifier, attr_input, word_emb, label], outputs = [attr_word_emb_dense, out]) model.add_loss(bc_loss) model.compile(optimizer=Adam(lr=1e-4), loss=None) return model
def build_vae_model(self): hidden, z_mean, z_log_var = self.build_encoder( filters=self.params['enc_filters'], kernels=self.params['enc_kernels'], strides=self.params['enc_strides'])(self.input) z = vae_z(latent_dim=self.latent_dim)(z_mean, z_log_var) # z = Concatenate()([z_mean, z_log_var, z]) dec = self.build_decoder(filters=self.params['dec_filters'], kernels=self.params['dec_kernels'], strides=self.params['dec_strides'])(z) # instantiate VAE model vae = Model(self.input, dec) # Compute VAE loss xent_loss = self.image_size[0] * self.image_size[ 1] * metrics.binary_crossentropy(K.flatten(self.input), K.flatten(dec)) kl_loss = -0.5 * K.sum( 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) vae_loss = K.mean(xent_loss + kl_loss) vae.add_loss(vae_loss) return vae
def create_gcn(self, img_flat_len = 1024): adj_graph = 1 - sklearn.metrics.pairwise.pairwise_distances( np.array(list(self.class_id_emb_attr['emb']))[:, :300], metric = 'cosine') attr_input = layers.Input(tensor= tf.constant(np.array(list(self.class_id_emb_attr['attr']), dtype = 'float32'))) all_word_emb = layers.Input(tensor= tf.constant(extract_array_from_series(self.class_id_emb_attr['emb']), dtype = 'float32')) #Input(shape = (230, 300,), name = 'wv') class_index = layers.Input(shape = (1, ), name = 'class_index', dtype = 'int32') adj_graphs = layers.Input(tensor=tf.constant(adj_graph, dtype = 'float32')) #Input(shape = (230, 230,), name = 'adj_graph') imag_classifier = layers.Input(shape = (img_flat_len,), name = 'img') attr_dense = layers.Dense(600, use_bias = False, kernel_initializer='he_normal', kernel_regularizer = l2(1e-4))(attr_input) attr_word_emb = layers.Concatenate()([all_word_emb, attr_dense]) all_classifier = self.full_connect_layer(attr_word_emb, hidden_dim = [ int(img_flat_len * 2), int(img_flat_len * 1.5), int(img_flat_len * 1.25 ), # img_flat_len ], activation = 'relu', adj_graphs = adj_graphs, drop_out_ratio = 0.2) all_classifier = self.full_connect_layer(all_classifier, hidden_dim = [img_flat_len], activation = 'relu', adj_graphs = adj_graphs) x = tf.gather_nd(all_classifier, class_index) mse_loss = K.mean(mean_squared_error(imag_classifier, x)) model = Model([class_index, imag_classifier, attr_input, all_word_emb, adj_graphs], outputs = [all_classifier]) #, vgg_output]) model.add_loss(mse_loss) model.compile(optimizer=Adam(lr=1e-4), loss=None) # model.summary() return model
def adr(frames, actions, states, context_frames, Ec, Eo, A, Do, Da, La=None, gaussian_a=False, use_seq_len=12, lstm_units=256, lstm_layers=1, learning_rate=0.001, random_window=True, reconstruct_random_frame=True): bs, seq_len, w, h, c = [int(s) for s in frames.shape] assert seq_len > use_seq_len frame_inputs, action_state, initial_state, _, ins = get_ins( frames, actions, states, use_seq_len=use_seq_len, random_window=random_window, gaussian=gaussian_a, a_units=lstm_units, a_layers=lstm_layers) # context frames at the beginning xc_0 = tf.slice(frame_inputs, (0, 0, 0, 0, 0), (-1, context_frames, -1, -1, -1)) x_to_recover = frame_inputs n_frames = use_seq_len # ===== Build the model hc_0, skips_0 = Ec(xc_0) hc_0 = tf.slice(hc_0, (0, context_frames - 1, 0), (-1, 1, -1)) skips = slice_skips(skips_0, start=context_frames - 1, length=1) if reconstruct_random_frame: a_s_dim = action_state.shape[-1] rand_index_1 = tf.random.uniform((), minval=0, maxval=use_seq_len, dtype='int32') action_state = tf.slice(action_state, (0, 0, 0), (bs, rand_index_1 + 1, a_s_dim)) x_to_recover = tf.slice(frames, (0, rand_index_1, 0, 0, 0), (bs, 1, w, h, c)) n_frames = rand_index_1 + 1 else: skips = repeat_skips(skips, use_seq_len) ha = A(action_state) hc_repeat = RepeatVector(n_frames)(tf.squeeze(hc_0, axis=1)) hc_ha = K.concatenate([hc_repeat, ha], axis=-1) if gaussian_a: _, za, _, _ = La([hc_ha, initial_state]) hc_ha = K.concatenate([hc_repeat, ha, za], axis=-1) if reconstruct_random_frame: _, hc_ha = tf.split(hc_ha, [-1, 1], axis=1) _, ha = tf.split(ha, [-1, 1], axis=1) hc_repeat = hc_0 x_rec_a = Da([hc_ha, skips]) # --> Changed the input to Eo from the error image to the full frame and the action only prediction x_rec_a_pos = K.relu(x_to_recover - x_rec_a) x_rec_a_neg = K.relu(x_rec_a - x_to_recover) # xo_rec_a = K.concatenate([x_rec_a_pos, x_rec_a_neg], axis=-1) xo_rec_a = K.concatenate([x_to_recover, x_rec_a], axis=-1) ho, _ = Eo(xo_rec_a) # ho = Eo(xo_rec_a) h = K.concatenate([hc_repeat, ha, ho], axis=-1) # multiple reconstruction x_err = Do([h, skips]) x_err_pos = x_err[:, :, :, :, :3] x_err_neg = x_err[:, :, :, :, 3:] x_recovered = x_err_pos - x_err_neg x_target = x_to_recover - x_rec_a x_target_pos = x_rec_a_pos x_target_neg = x_rec_a_neg # == Autoencoder model = Model(inputs=ins, outputs=x_recovered) rec_loss = mean_squared_error(x_target, x_recovered) model.add_metric(K.mean(rec_loss), name='rec_loss', aggregation='mean') rec_loss_pos = mean_squared_error(x_target_pos, x_err_pos) model.add_metric(rec_loss_pos, name='rec_loss_pos', aggregation='mean') rec_loss_neg = mean_squared_error(x_target_neg, x_err_neg) model.add_metric(rec_loss_neg, name='rec_loss_neg', aggregation='mean') rec_action_only_loss = mean_squared_error(x_rec_a, x_to_recover) model.add_metric(rec_action_only_loss, name='rec_A', aggregation='mean') model.add_loss( K.mean(rec_loss) + (K.mean(rec_loss_pos) + K.mean(rec_loss_neg))) model.compile(optimizer=Adam(lr=learning_rate)) return model
models = (encoder, decoder) #data = (x_test, y_test) # VAE loss = mse_loss or xent_loss + kl_loss # if args.mse: # reconstruction_loss = mse(K.flatten(inputs), K.flatten(outputs)) # else: reconstruction_loss = binary_crossentropy(K.flatten(inputs), K.flatten(outputs)) reconstruction_loss *= 120 * 160 * 3 kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='rmsprop') vae.summary() plot_model(vae, to_file='vae_cnn.png', show_shapes=True) train_gen = generator(opts, gen_records, cfg.BATCH_SIZE, True) val_gen = generator(opts, gen_records, cfg.BATCH_SIZE, False) # if args.weights: # vae.load_weights(args.weights) # else: # train the autoencoder total_records = len(gen_records) num_train = 0 num_val = 0
def train(self, data): """Pretrain the latent layers of the model.""" # network parameters original_dim = data.shape[1] input_shape = (original_dim, ) batch_size = params_training.batch_size latent_dim = params_training.num_latent epochs = params_training.num_epochs layer_num = 0 # build encoder model inputs = Input(shape=input_shape, name='encoder_input') inputs_noisy = GaussianNoise(stddev=0.1)(inputs) hidden = inputs_noisy for i, hidden_dim in enumerate(self.hidden_dim, 1): hidden_layer = Dense(hidden_dim, activation='sigmoid', name='hidden_e_{}'.format(i), weights=self.pretrain[layer_num]) hidden = hidden_layer(hidden) layer_num += 1 logger.debug("Hooked up hidden layer with %d neurons" % hidden_dim) z_mean = Dense(latent_dim, activation=None, name='z_mean', weights=self.pretrain[layer_num])(hidden) layer_num += 1 z_log_sigma = Dense(latent_dim, activation=None, name='z_log_sigma', weights=self.pretrain[layer_num])(hidden) layer_num += 1 z = Lambda(self.sampling, output_shape=(latent_dim, ), name='z')([z_mean, z_log_sigma]) encoder = Model(inputs, [z_mean, z_log_sigma, z], name='encoder') # build decoder model latent_inputs = Input(shape=(latent_dim, ), name='z_sampling') hidden = latent_inputs for i, hidden_dim in enumerate(self.hidden_dim[::-1], 1): # Reverse because decoder. hidden = Dense(hidden_dim, activation='sigmoid', name='hidden_d_{}'.format(i), weights=self.pretrain[layer_num])(hidden) layer_num += 1 logger.debug("Hooked up hidden layer with %d neurons" % hidden_dim) outputs = Dense(original_dim, activation='sigmoid')(hidden) decoder = Model(latent_inputs, outputs, name='decoder') # Build the DAE outputs = decoder(encoder(inputs)[2]) sdae = Model(inputs, outputs, name='vae_mlp') reconstruction_loss = binary_crossentropy(inputs, outputs) * original_dim kl_loss = 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) sdae.add_loss(vae_loss) sdae.compile(optimizer='adam') saver = ModelCheckpoint(check_path(TEMPORARY_SDAE_PATH), save_weights_only=True, verbose=1) tensorboard_config = TensorBoard( log_dir=check_path(TEMPORARY_SDAE_PATH)) logger.info("Checkpoint has been saved for SDAE.") # train the autoencoder logger.warning("Pretraining started, Don't interrupt.") sdae.fit(data, epochs=epochs, batch_size=batch_size, callbacks=[saver, tensorboard_config]) logger.info("Model has been pretrained successfully.")
self.label_length = label_length def call(self, labels, predictions): return tf.keras.backend.ctc_batch_cost(labels, predictions, self.input_length, self.label_length) tf.compat.v1.disable_eager_execution() labels = Input(name='labels', shape=(MAX_TEXT_LEN,), dtype='float32') input_length = Input(name='input_length', shape=(1,), dtype='int64') label_length = Input(name='label_length', shape=(1,), dtype='int64') inputs, outputs = OCR.conv_bgru((IMAGE_WIDTH, IMAGE_HEIGHT, 1), len(LabelCodec.ALPHABET) + 1) train_model = Model(inputs=[inputs, labels, input_length, label_length], outputs=outputs) train_model.load_weigths(MODEL_WEIGHTS_PATH) train_model.add_loss(CTCLoss(input_length, label_length)(labels, outputs)) train_model.compile(loss=None, optimizer=TrainHelper.get_optimizer(OPTIMIZER)) predict_model = Model(inputs=inputs, outputs=outputs) predict_model.summary() from tensorflow.keras.utils import plot_model from IPython.display import Image model_architecture_file = os.path.join(DOCUMENTATION_PATH, MODEL_NAME) + '.png' plot_model(predict_model, to_file=model_architecture_file, show_shapes=True) Image(filename=model_architecture_file, width=400) from sklearn.model_selection import train_test_split images, labels = loader.load(BACKGRND_HDF5, shuffle=True) X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2)
def adr_vp_feedback_frames(frames, actions, states, context_frames, Ec, Eo, A, Do, Da, L, La=None, gaussian_a=False, use_seq_len=12, lstm_a_units=256, lstm_a_layers=1, lstm_units=256, lstm_layers=2, learning_rate=0.0, random_window=False): bs, seq_len, w, h, c = [int(s) for s in frames.shape] assert seq_len >= use_seq_len frame_inputs, action_state, initial_state_a, initial_state, ins = get_ins( frames, actions, states, use_seq_len=use_seq_len, random_window=random_window, gaussian=gaussian_a, a_units=lstm_a_units, a_layers=lstm_a_layers, units=lstm_units, layers=lstm_layers, lstm=True) # context frames at the beginning xc_0 = tf.slice(frame_inputs, (0, 0, 0, 0, 0), (-1, context_frames, -1, -1, -1)) n_frames = use_seq_len # ===== Build the model hc_0, skips_0 = Ec(xc_0) hc_0 = tf.slice(hc_0, (0, context_frames - 1, 0), (-1, 1, -1)) skips_0 = slice_skips(skips_0, start=context_frames - 1, length=1) skips = repeat_skips(skips_0, n_frames) ha = A(action_state) hc_repeat = RepeatVector(n_frames)(tf.squeeze(hc_0, axis=1)) hc_ha = K.concatenate([hc_repeat, ha], axis=-1) if gaussian_a: _, za, _, _ = La([hc_ha, initial_state_a]) # za taken as the mean hc_ha = K.concatenate([hc_repeat, ha, za], axis=-1) x_rec_a = Da([hc_ha, skips]) # agent only prediction # x_err_pos = K.relu(frame_inputs - x_rec_a) # x_err_neg = K.relu(x_rec_a - frame_inputs) # xo_rec_a = K.concatenate([x_err_pos, x_err_neg], axis=-1) # ground truth error components # ho, _ = Eo(xo_rec_a) x_pred = [] prev_state = initial_state hc_t = hc_0 ha_t, _ = tf.split(ha, [-1, 1], axis=1) # remove last step _, ha_tp1 = tf.split(ha, [1, -1], axis=1) # remove first step _, xa_tp1 = tf.split(x_rec_a, [1, -1], axis=1) x = frame_inputs xa = x_rec_a for i in range(n_frames - 1): xa_t, xa = tf.split(xa, [1, -1], axis=1) xa_pred, xa_tp1 = tf.split(xa_tp1, [1, -1], axis=1) x_t, x = tf.split(x, [1, -1], axis=1) if i >= context_frames: x_t = x_pred_t x_xa_t = K.concatenate([x_t, xa_t], axis=-1) ho_t, _ = Eo(x_xa_t) _ha_t, ha_t = tf.split(ha_t, [1, -1], axis=1) _ha_tp1, ha_tp1 = tf.split(ha_tp1, [1, -1], axis=1) h = tf.concat([hc_t, _ha_t, _ha_tp1, ho_t], axis=-1) ho_pred, state = L([h, prev_state]) h_pred_t = tf.concat([hc_t, _ha_tp1, ho_pred], axis=-1) x_err_pred_t = Do([h_pred_t, skips_0]) x_err_pred_pos = x_err_pred_t[:, :, :, :, :3] x_err_pred_neg = x_err_pred_t[:, :, :, :, 3:] x_pred_t = xa_pred + x_err_pred_pos - x_err_pred_neg x_pred.append(x_pred_t) prev_state = state # Obtain predicted frames x_pred = tf.squeeze(tf.stack(x_pred, axis=1), axis=2) _, x_target = tf.split(frame_inputs, [1, -1], axis=1) outs = [x_pred, x_pred, x_pred, x_rec_a, x_target] # repetitions to match teacher forcing version model = Model(inputs=ins, outputs=outs, name='vp_model') rec_pred = mean_squared_error(y_pred=x_pred, y_true=x_target) model.add_metric(rec_pred, name='rec_pred', aggregation='mean') rec_A = mean_squared_error(y_pred=x_rec_a, y_true=frame_inputs) model.add_metric(rec_A, name='rec_A', aggregation='mean') model.add_loss(K.mean(rec_pred)) model.compile(optimizer=Adam(lr=learning_rate)) return model
class ADAE(object): def __init__(self, image_size=(28, 28, 1), latent_dim=100): self.image_size = image_size self.latent_dim = latent_dim self.input = Input(shape=image_size) # Build the generator self.generator = Autoencoder(image_size=image_size).build_model() # Build and compile the discriminator self.discriminator = Autoencoder(image_size=image_size).build_model() self.gx = self.generator(self.input) self.dx = self.discriminator(self.input) self.dgx = self.discriminator(self.gx) self.d_loss = Lambda( lambda x: K.mean( K.mean(K.mean(K.abs(x[0] - x[1]), axis=1), axis=1), axis=1) - K .mean(K.mean(K.mean(K.abs(x[2] - x[3]), axis=1), axis=1), axis=1), name='d_loss')([self.input, self.dx, self.gx, self.dgx]) self.g_loss = Lambda(lambda x: K.mean( K.mean(K.mean(K.abs(x[0] - x[1]), axis=1), axis=1), axis=1 ) + K.mean(K.mean(K.mean(K.abs(x[1] - x[2]), axis=1), axis=1), axis=1), name='g_loss')([self.input, self.gx, self.dgx]) self.model = Model(inputs=[self.input], outputs=[self.g_loss, self.d_loss]) self.model.summary() # self.generator.summary() # self.discriminator.summary() def get_anomaly_score(self): """ Compute the anomaly score. Call it after training. """ score_out = Lambda(lambda x: K.mean( K.mean(K.mean((x[0] - x[1])**2, axis=1), axis=1), axis=1))([ self.model.inputs[0], self.model.layers[2]( self.model.layers[1](self.model.inputs[0])) ]) return Model(self.model.inputs[0], score_out) def get_generator_trained_model(self): """ Get the generator to reconstruct the input. Call it after training. """ return Model(self.model.inputs[0], self.model.layers[1](self.model.inputs[0])) def get_discrinminator_trained_model(self): """ Get the discrinminator to reconstruct the input. Call it after training. """ return Model( self.model.inputs[0], self.model.layers[2](self.model.layers[1](self.model.inputs[0]))) def train(self, x_train, x_test, y_train, y_test, epochs=1): self.model.add_loss(K.mean(self.g_loss)) self.model.add_metric(self.g_loss, aggregation='mean', name="g_loss") self.model.add_loss(K.mean(self.d_loss)) self.model.add_metric(self.d_loss, aggregation='mean', name="d_loss") for epoch in range(epochs): print('Epoch %d/%d' % (epoch + 1, epochs)) # Train generator only self.model.layers[1].trainable = True self.model.layers[2].trainable = False self.model.compile('adam', loss_weights={'g_loss': 1, 'd_loss': 0}) print('Training on Generator') self.model.fit(x_train, batch_size=64, steps_per_epoch=200, epochs=epoch, callbacks=[ LearningRateScheduler( lr_scheduler(initial_lr=1e-3, decay_factor=0.75, step_size=10, min_lr=1e-5)) ], initial_epoch=epoch - 1) # Train discriminator only self.model.layers[1].trainable = False self.model.layers[2].trainable = True self.model.compile('adam', loss_weights={'g_loss': 0, 'd_loss': 1}) print('Training on Discriminator') self.model.fit( x_train, batch_size=64, steps_per_epoch=200, epochs=epoch, callbacks=[ ModelCheckpoint( './model_checkpoint/model_%d_gloss_{g_loss:.4f}_dloss_{d_loss:.4f}.h5' % epoch, verbose=1), LearningRateScheduler( lr_scheduler(initial_lr=1e-3, decay_factor=0.75, step_size=10, min_lr=1e-5)) ], initial_epoch=epoch - 1)
def DIEN(dnn_feature_columns, history_feature_list, gru_type="GRU", use_negsampling=False, alpha=1.0, use_bn=False, dnn_hidden_units=(256, 128, 64), dnn_activation='relu', att_hidden_units=(64, 16), att_activation="dice", att_weight_normalization=True, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, seed=1024, task='binary'): """Instantiates the Deep Interest Evolution Network architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param history_feature_list: list,to indicate sequence sparse field :param gru_type: str,can be GRU AIGRU AUGRU AGRU :param use_negsampling: bool, whether or not use negtive sampling :param alpha: float ,weight of auxiliary_loss :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param att_hidden_units: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(dnn_feature_columns) user_behavior_length = features["seq_length"] sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] history_feature_columns = [] neg_history_feature_columns = [] sparse_varlen_feature_columns = [] history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list)) neg_history_fc_names = list(map(lambda x: "neg_" + x, history_fc_names)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: history_feature_columns.append(fc) elif feature_name in neg_history_fc_names: neg_history_feature_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) inputs_list = list(features.values()) embedding_dict = create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, seed, prefix="", seq_mask_zero=False) query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, return_feat_list=history_feature_list, to_list=True) keys_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, return_feat_list=history_fc_names, to_list=True) dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, mask_feat_list=history_feature_list, to_list=True) dense_value_list = get_dense_input(features, dense_feature_columns) sequence_embed_dict = varlen_embedding_lookup( embedding_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list( sequence_embed_dict, features, sparse_varlen_feature_columns, to_list=True) dnn_input_emb_list += sequence_embed_list keys_emb = concat_func(keys_emb_list) deep_input_emb = concat_func(dnn_input_emb_list) query_emb = concat_func(query_emb_list) if use_negsampling: neg_uiseq_embed_list = embedding_lookup(embedding_dict, features, neg_history_feature_columns, neg_history_fc_names, to_list=True) neg_concat_behavior = concat_func(neg_uiseq_embed_list) else: neg_concat_behavior = None hist, aux_loss_1 = interest_evolution( keys_emb, query_emb, user_behavior_length, gru_type=gru_type, use_neg=use_negsampling, neg_concat_behavior=neg_concat_behavior, att_hidden_size=att_hidden_units, att_activation=att_activation, att_weight_normalization=att_weight_normalization, ) deep_input_emb = Concatenate()([deep_input_emb, hist]) deep_input_emb = Flatten()(deep_input_emb) dnn_input = combined_dnn_input([deep_input_emb], dense_value_list) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed=seed)(dnn_input) final_logit = Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(output) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) if use_negsampling: model.add_loss(alpha * aux_loss_1) try: tf.keras.backend.get_session().run(tf.global_variables_initializer()) except AttributeError: tf.compat.v1.keras.backend.get_session().run( tf.compat.v1.global_variables_initializer()) tf.compat.v1.experimental.output_all_intermediates(True) return model
def DIEN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16, gru_type="GRU", use_negsampling=False, alpha=1.0, use_bn=False, hidden_size=(200, 80), activation='sigmoid', att_hidden_size=(64, 16), att_activation=Dice, att_weight_normalization=True, l2_reg_deep=0, l2_reg_embedding=1e-5, final_activation='sigmoid', keep_prob=1, init_std=0.0001, seed=1024, ): """Instantiates the Deep Interest Evolution Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param gru_type: str,can be GRU AIGRU AUGRU AGRU :param use_negsampling: bool, whether or not use negtive sampling :param alpha: float ,weight of auxiliary_loss :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param activation: Activation function to use in deep net :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_deep: float. L2 regularizer strength applied to deep net :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` :param keep_prob: float in (0,1]. keep_prob used in deep net :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = {feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2( l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name) for i, feat in enumerate(feature_dim_dict["sparse"])} query_emb_list = [sparse_embedding_dict[feat]( sparse_input[feat]) for feat in seq_feature_list] keys_emb_list = [sparse_embedding_dict[feat]( user_behavior_input[feat]) for feat in seq_feature_list] deep_input_emb_list = [sparse_embedding_dict[feat.name]( sparse_input[feat.name]) for feat in feature_dim_dict["sparse"]] query_emb = Concatenate()(query_emb_list) if len( query_emb_list) > 1 else query_emb_list[0] keys_emb = Concatenate()(keys_emb_list) if len( keys_emb_list) > 1 else keys_emb_list[0] deep_input_emb = Concatenate()(deep_input_emb_list) if len( deep_input_emb_list) > 1 else deep_input_emb_list[0] if use_negsampling: neg_user_behavior_input = {feat: Input(shape=(hist_len_max,), name='neg_seq_' + str(i) + '-' + feat) for i, feat in enumerate(seq_feature_list)} neg_uiseq_embed_list = [sparse_embedding_dict[feat]( neg_user_behavior_input[feat]) for feat in seq_feature_list] neg_concat_behavior = Concatenate()(neg_uiseq_embed_list) if len(neg_uiseq_embed_list) > 1 else \ neg_uiseq_embed_list[0] else: neg_concat_behavior = None hist, aux_loss_1 = interest_evolution(keys_emb, query_emb, user_behavior_length, gru_type=gru_type, use_neg=use_negsampling, neg_concat_behavior=neg_concat_behavior, embedding_size=embedding_size, att_hidden_size=att_hidden_size, att_activation=att_activation, att_weight_normalization=att_weight_normalization,) deep_input_emb = Concatenate()([deep_input_emb, hist]) deep_input_emb = tf.keras.layers.Flatten()(deep_input_emb) if len(dense_input) > 0: deep_input_emb = Concatenate()( [deep_input_emb]+list(dense_input.values())) output = MLP(hidden_size, activation, l2_reg_deep, keep_prob, use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(final_activation)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) if use_negsampling: model_input_list += list(neg_user_behavior_input.values()) model_input_list += [user_behavior_length] model = Model(inputs=model_input_list, outputs=output) if use_negsampling: model.add_loss(alpha * aux_loss_1) tf.keras.backend.get_session().run(tf.global_variables_initializer()) return model
def adr_ao(frames, actions, states, context_frames, Ec, A, D, learning_rate=0.01, gaussian=False, kl_weight=None, L=None, use_seq_len=12, lstm_units=None, lstm_layers=None, training=True, reconstruct_random_frame=False, random_window=True): bs, seq_len, w, h, c = [int(s) for s in frames.shape] assert seq_len >= use_seq_len frame_inputs, action_state, initial_state, _, ins = get_ins( frames, actions, states, use_seq_len=use_seq_len, random_window=random_window, gaussian=gaussian, a_units=lstm_units, a_layers=lstm_layers) rand_index_1 = tf.random.uniform(shape=(), minval=0, maxval=use_seq_len - context_frames + 1, dtype='int32') # Random xc_0, as an artificial way of augmenting the dataset xc_0 = tf.slice(frame_inputs, (0, rand_index_1, 0, 0, 0), (-1, context_frames, -1, -1, -1)) xc_1 = tf.slice(frame_inputs, (0, 0, 0, 0, 0), (-1, context_frames, -1, -1, -1)) x_to_recover = frame_inputs n_frames = use_seq_len # ===== Build the model hc_0, skips_0 = Ec(xc_0) hc_1, _ = Ec(xc_1) hc_0 = tf.slice(hc_0, (0, context_frames - 1, 0), (-1, 1, -1)) hc_1 = tf.slice(hc_1, (0, context_frames - 1, 0), (-1, 1, -1)) skips = slice_skips(skips_0, start=context_frames - 1, length=1) if reconstruct_random_frame: action_state_len = action_state.shape[-1] rand_index_2 = tf.random.uniform(shape=(), minval=0, maxval=use_seq_len, dtype='int32') action_state = tf.slice(action_state, (0, 0, 0), (bs, rand_index_2 + 1, action_state_len)) x_to_recover = tf.slice(frame_inputs, (0, rand_index_2, 0, 0, 0), (bs, 1, w, h, c)) n_frames = rand_index_2 + 1 else: skips = repeat_skips(skips, use_seq_len) ha = A(action_state) hc_repeat = RepeatVector(n_frames)(tf.squeeze(hc_0, axis=1)) hc_ha = K.concatenate([hc_repeat, ha], axis=-1) if gaussian: z, mu, logvar, state = L([hc_ha, initial_state]) z = mu if training is False else z hc_ha = K.concatenate([hc_repeat, ha, z], axis=-1) if reconstruct_random_frame: _, hc_ha = tf.split(hc_ha, [-1, 1], axis=1) if gaussian: _, mu = tf.split(mu, [-1, 1], axis=1) _, logvar = tf.split(logvar, [-1, 1], axis=1) x_recovered = D([hc_ha, skips]) rec_loss = mean_squared_error(x_to_recover, x_recovered) sim_loss = mean_squared_error(hc_0, hc_1) if gaussian: ED = Model(inputs=ins, outputs=[x_recovered, x_to_recover, mu, logvar]) else: ED = Model(inputs=ins, outputs=[x_recovered, x_to_recover]) ED.add_metric(rec_loss, name='rec_loss', aggregation='mean') ED.add_metric(sim_loss, name='sim_loss', aggregation='mean') if gaussian: kl_loss = kl_unit_normal(mu, logvar) ED.add_metric(kl_loss, name='kl_loss', aggregation='mean') ED.add_loss( K.mean(rec_loss) + K.mean(sim_loss) + kl_weight * K.mean(kl_loss)) else: ED.add_loss(K.mean(rec_loss) + K.mean(sim_loss)) ED.compile(optimizer=Adam(lr=learning_rate)) return ED
def adr_vp_teacher_forcing(frames, actions, states, context_frames, Ec, Eo, A, Do, Da, L, La=None, gaussian_a=False, use_seq_len=12, lstm_a_units=256, lstm_a_layers=1, lstm_units=256, lstm_layers=2, learning_rate=0.001, random_window=False): bs, seq_len, w, h, c = [int(s) for s in frames.shape] assert seq_len >= use_seq_len frame_inputs, action_state, initial_state_a, initial_state, ins = get_ins( frames, actions, states, use_seq_len=use_seq_len, random_window=random_window, gaussian=gaussian_a, a_units=lstm_a_units, a_layers=lstm_a_layers, units=lstm_units, layers=lstm_layers, lstm=True) # context frames at the beginning xc_0 = tf.slice(frame_inputs, (0, 0, 0, 0, 0), (-1, context_frames, -1, -1, -1)) n_frames = use_seq_len # ===== Build the model hc_0, skips_0 = Ec(xc_0) hc_0 = tf.slice(hc_0, (0, context_frames - 1, 0), (-1, 1, -1)) skips_0 = slice_skips(skips_0, start=context_frames - 1, length=1) skips = repeat_skips(skips_0, n_frames) ha = A(action_state) hc_repeat = RepeatVector(n_frames)(tf.squeeze(hc_0, axis=1)) hc_ha = K.concatenate([hc_repeat, ha], axis=-1) if gaussian_a: _, za, _, _ = La([hc_ha, initial_state_a]) # za taken as the mean hc_ha = K.concatenate([hc_repeat, ha, za], axis=-1) x_rec_a = Da([hc_ha, skips]) # agent only prediction x_err_pos = K.relu(frame_inputs - x_rec_a) x_err_neg = K.relu(x_rec_a - frame_inputs) # xo_rec_a = K.concatenate([frame_inputs, x_rec_a], axis=-1) # --> Here the action only image is not needed xo_rec_a = K.concatenate([x_err_pos, x_err_neg], axis=-1) # ground truth error components remove_first_step = Lambda( lambda _x: tf.split(_x, [1, -1], axis=1)) # new operations remove_last_step = Lambda(lambda _x: tf.split(_x, [-1, 1], axis=1)) ho, _ = Eo(xo_rec_a) hc = RepeatVector(n_frames - 1)(K.squeeze(hc_0, axis=1)) skips = repeat_skips(skips_0, ntimes=n_frames - 1) ha_t, _ = remove_last_step(ha) # [0 to 18] _, ha_tp1 = remove_first_step(ha) # [1 to 19] ho_t, _ = remove_last_step(ho) # [0 to 18] h = tf.concat([hc, ha_t, ha_tp1, ho_t], axis=-1) # [0 to 18] ho_pred, _ = L([h, initial_state]) # [1 to 19] _, ho_tp1 = remove_first_step(ho) # [1 to 19] Target for LSTM outputs x_rec_a_t, _ = remove_last_step(x_rec_a) # [0 to 18] Used to obtain x_curr _, x_rec_a_tp1 = remove_first_step( x_rec_a) # [1 to 19] Used to obtain x_pred _, x_target_pred = remove_first_step( frame_inputs) # Target for Do pred reconstruction _, x_err_pos_target = remove_first_step( x_err_pos) # Target for Do pred reconstruction _, x_err_neg_target = remove_first_step( x_err_neg) # Target for Do pred reconstruction # reconstruct current step h = tf.concat([hc, ha_t, ho_t], axis=-1) x_err_curr = Do([h, skips]) x_target_curr, _ = remove_last_step( frame_inputs) # [0 to 18] Target for x_curr x_err_curr_pos = x_err_curr[:, :, :, :, :3] x_err_curr_neg = x_err_curr[:, :, :, :, 3:] x_curr = x_rec_a_t + x_err_curr_pos - x_err_curr_neg # predict one step ahead h = tf.concat([hc, ha_tp1, ho_pred], axis=-1) x_err_pred = Do([h, skips]) x_err_pred_pos = x_err_pred[:, :, :, :, :3] x_err_pred_neg = x_err_pred[:, :, :, :, 3:] x_pred = x_rec_a_tp1 + x_err_pred_pos - x_err_pred_neg model = Model(inputs=ins, outputs=[ho_pred, x_curr, x_pred, x_rec_a, x_target_pred], name='vp_model') ho_mse = mean_squared_error(y_pred=ho_pred, y_true=ho_tp1) model.add_metric(K.mean(ho_mse), name='ho_mse', aggregation='mean') rec_curr = mean_squared_error(y_pred=x_curr, y_true=x_target_curr) model.add_metric(rec_curr, name='rec_curr', aggregation='mean') rec_pred = mean_squared_error(y_pred=x_pred, y_true=x_target_pred) model.add_metric(rec_pred, name='rec_pred', aggregation='mean') rec_pos = mean_squared_error(y_pred=x_err_pred_pos, y_true=x_err_pos_target) rec_neg = mean_squared_error(y_pred=x_err_pred_neg, y_true=x_err_neg_target) rec_A = mean_squared_error(y_pred=x_rec_a, y_true=frame_inputs) model.add_metric(rec_A, name='rec_A', aggregation='mean') # why did I have rec_curr?? # model.add_loss(0.5*K.mean(ho_mse) + 0.125*K.mean(rec_curr) + 0.125*K.mean(rec_pred) # + 0.125*K.mean(rec_pos) + 0.125*K.mean(rec_neg)) # model.add_loss(0.5*K.mean(ho_mse) + 0.5/3*(K.mean(rec_pred)) + K.mean(rec_pos) + K.mean(rec_neg)) model.add_loss(K.mean(rec_pred) + K.mean(rec_pos) + K.mean(rec_neg)) model.compile(Adam(lr=learning_rate)) return model