def get_encoder(input_idx, input_one_hot_embeddings, nfilter, z_size, intermediate_dim, one_hot_embeddings): # oshape = (batch_size, sample_size/2, 128) lstm = LSTM(units=300, dropout=0.2, recurrent_dropout=0.2, name='encoding_lstm', go_backwards=True, implementation=2)(one_hot_embeddings) hidden_mean = Dense(z_size, name='mu')(lstm) hidden_log_sigma = Dense(z_size, name='sigma')(lstm) sampling_object = Sampling(z_size) sampling = sampling_object([hidden_mean, hidden_log_sigma]) discr_encoder = Model(inputs=one_hot_embeddings, outputs=[sampling, hidden_mean, hidden_log_sigma], name='discr_encoder') z_p, z_mean, z_sigma = discr_encoder(input_one_hot_embeddings) encoder = Model(inputs=input_idx, outputs=[z_p, z_mean, z_sigma], name='encoder') return encoder, discr_encoder
def get_encoder(config_data, input_idx, input_one_hot_embeddings, nfilter, name, z_size): intermediate_dim = config_data['intermediate_dim'] conv1 = Conv1D(filters=nfilter, kernel_size=3, strides=2, padding='same')(input_one_hot_embeddings) bn1 = BatchNormalization(scale=False)(conv1) relu1 = PReLU()(bn1) # oshape = (batch_size, sample_size/4, 128) conv2 = Conv1D(filters=2 * nfilter, kernel_size=3, strides=2, padding='same')(relu1) bn2 = BatchNormalization(scale=False)(conv2) relu2 = PReLU()(bn2) # oshape = (batch_size, sample_size/4*256) flatten = Flatten()(relu2) # need to store the size of the representation after the convolutions -> needed for deconv later hidden_intermediate_enc = Dense(intermediate_dim, name='intermediate_encoding')(flatten) hidden_zvalues = Dense(z_size * 2)(hidden_intermediate_enc) sampling_object = Sampling(z_size) sampling = sampling_object(hidden_zvalues) encoder = Model(inputs=input_idx, outputs=sampling, name='encoder_{}'.format(name)) return encoder, sampling_object
def get_encoder(inputs, name_one_hot_embeddings, near_one_hot_embeddings, nfilter, z_size, intermediate_dim): name_idx, eat_type_idx, price_range_idx, customer_feedback_idx, near_idx, food_idx, area_idx, family_idx, _ = inputs #name_conv = get_conv_stack(name_one_hot_embeddings, nfilter) #near_conv = get_conv_stack(near_one_hot_embeddings, nfilter) #name_hidden = Dense(units=16, activation='relu')(name_conv) #near_hidden = Dense(units=16, activation='relu')(near_conv) full_concat = concatenate(inputs=[name_idx, near_idx, eat_type_idx, price_range_idx, customer_feedback_idx, food_idx, area_idx, family_idx]) # need to store the size of the representation after the convolutions -> needed for deconv later hidden_intermediate_enc = Dense( intermediate_dim, name='intermediate_encoding' )(full_concat) hidden_mean = Dense(z_size, name='mu')(hidden_intermediate_enc) hidden_log_sigma = Dense(z_size, name='sigma')(hidden_intermediate_enc) sampling_object = Sampling(z_size) sampling = sampling_object([hidden_mean, hidden_log_sigma]) encoder = Model(inputs=inputs[:-1], outputs=[sampling, hidden_mean, hidden_log_sigma]) encoder.summary() return encoder, [hidden_mean, hidden_log_sigma], full_concat
def get_encoder(input_idx, input_one_hot_embeddings, nfilter, z_size, intermediate_dim): # oshape = (batch_size, sample_size/2, 128) conv1 = Conv1D(filters=nfilter, kernel_size=3, strides=2, padding='same')(input_one_hot_embeddings) bn1 = BatchNormalization()(conv1) relu1 = Activation('relu')(bn1) # oshape = (batch_size, sample_size/4, 128) conv2 = Conv1D(filters=2 * nfilter, kernel_size=3, strides=2, padding='same')(relu1) bn2 = BatchNormalization()(conv2) relu2 = Activation('relu')(bn2) conv3 = Conv1D( filters=2 * nfilter, kernel_size=3, strides=2, padding='same', )(relu2) bn3 = BatchNormalization()(conv3) relu3 = Activation('relu')(bn3) # oshape = (batch_size, sample_size/4*256) flatten = Flatten()(relu3) # need to store the size of the representation after the convolutions -> needed for deconv later hidden_intermediate_enc = Dense(intermediate_dim, name='intermediate_encoding')(flatten) hidden_mean = Dense(z_size, name='mu')(hidden_intermediate_enc) hidden_log_sigma = Dense(z_size, name='sigma')(hidden_intermediate_enc) sampling_object = Sampling(z_size) sampling = sampling_object([hidden_mean, hidden_log_sigma]) encoder = Model(inputs=input_idx, outputs=[sampling, hidden_mean, hidden_log_sigma]) return encoder
def vae_model(config_data, vocab, step): z_size = config_data['z_size'] sample_size = config_data['max_sentence_length'] nclasses = len(vocab) + 2 #last available index is reserved as start character start_word_idx = nclasses - 1 lstm_size = config_data['lstm_size'] alpha = config_data['alpha'] intermediate_dim = config_data['intermediate_dim'] nfilter = 128 out_size = 200 eps = 0.001 anneal_start = 1000.0 anneal_end = anneal_start + 7000.0 # == == == == == = # Define Encoder # == == == == == = input_idx = Input(batch_shape=(None, sample_size), dtype='float32', name='character_input') one_hot_weights = np.identity(nclasses) #oshape = (batch_size, sample_size, nclasses) one_hot_embeddings = Embedding(input_length=sample_size, input_dim=nclasses, output_dim=nclasses, weights=[one_hot_weights], trainable=False, name='one_hot_embeddings') input_one_hot_embeddings = one_hot_embeddings((input_idx)) #oshape = (batch_size, sample_size/2, 128) conv1 = Conv1D(filters=nfilter, kernel_size=3, strides=2, padding='same')(input_one_hot_embeddings) bn1 = BatchNormalization()(conv1) relu1 = Activation(activation='relu')(bn1) # oshape = (batch_size, sample_size/4, 128) conv2 = Conv1D(filters=2 * nfilter, kernel_size=3, strides=2, padding='same')(relu1) bn2 = BatchNormalization()(conv2) relu2 = Activation(activation='relu')(bn2) #oshape = (batch_size, sample_size/4*256) flatten = Flatten()(relu2) #need to store the size of the representation after the convolutions -> needed for deconv later hidden_intermediate_enc = Dense(intermediate_dim, activation='relu', name='intermediate_encoding')(flatten) hidden_zvalues = Dense(z_size * 2)(hidden_intermediate_enc) sampling_object = Sampling(z_size) sampling = sampling_object(hidden_zvalues) # == == == == == = # Define Decoder # == == == == == = hidden_intermediate_dec = Dense(intermediate_dim, name='intermediate_decoding')(sampling) decoder_upsample = Dense(int(2 * nfilter * sample_size / 4))(hidden_intermediate_dec) if K.image_data_format() == 'channels_first': output_shape = (2 * nfilter, int(sample_size / 4), 1) else: output_shape = (int(sample_size / 4), 1, 2 * nfilter) reshape = Reshape(output_shape)(decoder_upsample) #shape = (batch_size, filters) deconv1 = Conv2DTranspose(filters=nfilter, kernel_size=(3, 1), strides=(2, 1), padding='same')(reshape) bn3 = BatchNormalization()(deconv1) relu3 = Activation(activation='relu')(bn3) deconv2 = Conv2DTranspose(filters=out_size, kernel_size=(3, 1), strides=(2, 1), padding='same')(relu3) bn4 = BatchNormalization()(deconv2) relu4 = Activation(activation='relu')(bn4) reshape = Reshape((sample_size, out_size))(relu4) softmax = Dense(nclasses, activation='softmax')(reshape) def argmax_fun(softmax_output): return K.argmax(softmax_output, axis=2) def vae_loss(args): x, x_decoded_mean = args # NOTE: binary_crossentropy expects a batch_size by dim # for x and x_decoded_mean, so we MUST flatten these! x = K.flatten(K.clip(x, 1e-5, 1 - 1e-5)) x_decoded_mean = K.flatten(x_decoded_mean) xent_loss = nclasses * sample_size * binary_crossentropy( x, x_decoded_mean) kl_loss = -0.5 * K.mean(1 + sampling_object.log_sigma - K.square( sampling_object.mu) - K.exp(sampling_object.log_sigma), axis=-1) kld_weight = K.clip((step - anneal_start) / (anneal_end - anneal_start), 0, 1 - eps) + eps return xent_loss + kl_loss * kld_weight def identity_loss(y_true, y_pred): return y_pred loss = Lambda(vae_loss, output_shape=(1, ))([input_one_hot_embeddings, softmax]) argmax = Lambda(argmax_fun, output_shape=(sample_size, ))(softmax) train_model = Model(inputs=[input_idx], outputs=[loss]) test_model = Model(inputs=[input_idx], outputs=[argmax]) return train_model, test_model
def vae_model(config_data, vocab, step): z_size = config_data['z_size'] sample_in_size = config_data['max_input_length'] sample_out_size = config_data['max_output_length'] nclasses = len(vocab) + 2 #last available index is reserved as start character intermediate_dim = config_data['intermediate_dim'] nfilter = 128 out_size = 200 eps = 0.001 anneal_start = 1000.0 anneal_end = anneal_start + 7000.0 # == == == == == = # Define Encoder # == == == == == = input_idx = Input(batch_shape=(None, sample_in_size), dtype='float32', name='character_input') output_idx = Input(batch_shape=(None, sample_out_size), dtype='int32', name='character_output') one_hot_weights = np.identity(nclasses) #oshape = (batch_size, sample_size, nclasses) one_hot_embeddings = Embedding(input_length=sample_in_size, input_dim=nclasses, output_dim=nclasses, weights=[one_hot_weights], trainable=False, name='one_hot_embeddings') input_one_hot_embeddings = one_hot_embeddings((input_idx)) #oshape = (batch_size, sample_size/2, 128) conv1 = Conv1D(filters=nfilter, kernel_size=3, strides=2, padding='same')(input_one_hot_embeddings) bn1 = BatchNormalization()(conv1) relu1 = Activation(activation='relu')(bn1) # oshape = (batch_size, sample_size/4, 128) conv2 = Conv1D(filters=2 * nfilter, kernel_size=3, strides=2, padding='same')(relu1) bn2 = BatchNormalization()(conv2) relu2 = Activation(activation='relu')(bn2) #oshape = (batch_size, sample_size/4*256) flatten = Flatten()(relu2) #need to store the size of the representation after the convolutions -> needed for deconv later hidden_intermediate_enc = Dense(intermediate_dim, activation='relu', name='intermediate_encoding')(flatten) hidden_mean = Dense(z_size, name='mu')(hidden_intermediate_enc) hidden_log_sigma = Dense(z_size, name='sigma')(hidden_intermediate_enc) sampling_object = Sampling(z_size) sampling = sampling_object([hidden_mean, hidden_log_sigma]) # == == == == == = # Define Decoder # == == == == == = hidden_intermediate_dec = Dense(intermediate_dim, name='intermediate_decoding')(sampling) decoder_upsample = Dense(int(2 * nfilter * sample_out_size / 4))(hidden_intermediate_dec) if K.image_data_format() == 'channels_first': output_shape = (2 * nfilter, int(sample_out_size / 4), 1) else: output_shape = (int(sample_out_size / 4), 1, 2 * nfilter) reshape = Reshape(output_shape)(decoder_upsample) #shape = (batch_size, filters) deconv1 = Conv2DTranspose(filters=nfilter, kernel_size=(3, 1), strides=(2, 1), padding='same')(reshape) bn3 = BatchNormalization()(deconv1) relu3 = Activation(activation='relu')(bn3) deconv2 = Conv2DTranspose(filters=out_size, kernel_size=(3, 1), strides=(2, 1), padding='same')(relu3) bn4 = BatchNormalization()(deconv2) relu4 = Activation(activation='relu')(bn4) reshape = Reshape((sample_out_size, out_size))(relu4) softmax = Dense(nclasses, activation='softmax')(reshape) def argmax_fun(softmax_output): return K.argmax(softmax_output, axis=2) def vae_loss(args): x_truth, x_decoded_final = args x_truth_flatten = K.flatten(x_truth) x_decoded_flat = K.reshape(x_decoded_final, shape=(-1, K.shape(x_decoded_final)[-1])) cross_ent = T.nnet.categorical_crossentropy(x_decoded_flat, x_truth_flatten) cross_ent = K.reshape(cross_ent, shape=(-1, K.shape(x_truth)[1])) sum_over_sentences = K.sum(cross_ent, axis=1) return sum_over_sentences def vae_kld_loss(args): mu, log_sigma = args kl_loss = -0.5 * K.sum(1 + log_sigma - K.square(mu) - K.exp(log_sigma), axis=-1) kld_weight = K.clip((step - anneal_start) / (anneal_end - anneal_start), 0, 1 - eps) + eps return kl_loss * kld_weight def identity_loss(y_true, y_pred): return y_pred loss = Lambda(vae_loss, output_shape=(1, ))([output_idx, softmax]) kld_loss = Lambda(vae_kld_loss, output_shape=(1, ), name='kld_loss')([hidden_mean, hidden_log_sigma]) argmax = Lambda(argmax_fun, output_shape=(sample_out_size, ))(softmax) train_model = Model(inputs=[input_idx, output_idx], outputs=[loss, kld_loss]) test_model = Model(inputs=[input_idx], outputs=[argmax]) return train_model, test_model
def vae_model(config_data, vocab, step): z_size = config_data['z_size'] sample_size = config_data['max_sentence_length'] nclasses = len(vocab) + 2 #last available index is reserved as start character start_word_idx = nclasses - 1 lstm_size = config_data['lstm_size'] alpha = config_data['alpha'] intermediate_dim = config_data['intermediate_dim'] nfilter = 128 out_size = 200 eps = 0.001 anneal_start = 0.0 anneal_end = anneal_start + 7000.0 embedding_path = join(config_data['vocab_path'], 'embedding_matrix.npy') embedding_matrix = np.load(open(embedding_path, 'rb')) nclasses = embedding_matrix.shape[0] emb_dim = embedding_matrix.shape[1] # == == == == == = # Define Encoder # == == == == == = input_idx = Input(batch_shape=(None, sample_size), dtype='int32', name='character_input') #one_hot_weights = np.identity(nclasses) #oshape = (batch_size, sample_size, nclasses) word_embedding_layer = Embedding(input_length=sample_size, input_dim=nclasses, output_dim=emb_dim, weights=[embedding_matrix], trainable=False, name='word_embeddings') input_word_embeddings = word_embedding_layer((input_idx)) #oshape = (batch_size, sample_size/2, 128) conv1 = Conv1D(filters=nfilter, kernel_size=3, strides=2, padding='same')(input_word_embeddings) bn1 = BatchNormalization()(conv1) relu1 = Activation(activation='relu')(bn1) # oshape = (batch_size, sample_size/4, 128) conv2 = Conv1D(filters=2 * nfilter, kernel_size=3, strides=2, padding='same')(relu1) bn2 = BatchNormalization()(conv2) relu2 = Activation(activation='relu')(bn2) #oshape = (batch_size, sample_size/4*256) flatten = Flatten()(relu2) #need to store the size of the representation after the convolutions -> needed for deconv later hidden_intermediate_enc = Dense(intermediate_dim, activation='relu', name='intermediate_encoding')(flatten) hidden_zvalues = Dense(z_size * 2)(hidden_intermediate_enc) sampling_object = Sampling(z_size) sampling = sampling_object(hidden_zvalues) # == == == == == = # Define Decoder # == == == == == = hidden_intermediate_dec = Dense(intermediate_dim, name='intermediate_decoding')(sampling) decoder_upsample = Dense(int(2 * nfilter * sample_size / 4))(hidden_intermediate_dec) if K.image_data_format() == 'channels_first': output_shape = (2 * nfilter, int(sample_size / 4), 1) else: output_shape = (int(sample_size / 4), 1, 2 * nfilter) reshape = Reshape(output_shape)(decoder_upsample) #shape = (batch_size, filters) deconv1 = Conv2DTranspose(filters=nfilter, kernel_size=(3, 1), strides=(2, 1), padding='same')(reshape) bn3 = BatchNormalization()(deconv1) relu3 = Activation(activation='relu')(bn3) deconv2 = Conv2DTranspose(filters=out_size, kernel_size=(3, 1), strides=(2, 1), padding='same')(relu3) bn4 = BatchNormalization()(deconv2) relu4 = Activation(activation='relu')(bn4) reshape = Reshape((sample_size, out_size))(relu4) hidden = Dense(out_size, activation='linear')(reshape) hidden = Dense(out_size, activation='linear')(hidden) hidden = Dense(out_size, activation='linear')(hidden) def vae_cosine_distance_loss(args): x_truth, x_decoded_final = args #normalize over embedding-dimension xt_mag = K.l2_normalize(x_truth, axis=2) #None, 40, 200 xp_mag = K.l2_normalize(x_decoded_final, axis=2) #None, 40, 200 elem_mult = xt_mag * xp_mag cosine_sim = K.sum(elem_mult, axis=2) #None, 40 cosine_distance = 1 - cosine_sim #size = None, 40 sum_over_sentences = K.sum(cosine_distance, axis=1) #None return sum_over_sentences def vae_mse_loss(args): x_truth, x_decoded_final = args difference = x_truth - x_decoded_final squared_difference = K.square(difference) sums = K.sum(K.sum(squared_difference, axis=2), axis=1) return sums def vae_kld_loss(args): kl_loss = -0.5 * K.sum(1 + sampling_object.log_sigma - K.square( sampling_object.mu) - K.exp(sampling_object.log_sigma), axis=-1) kld_weight = K.clip((step - anneal_start) / (anneal_end - anneal_start), 0, 1 - eps) + eps return kl_loss * kld_weight main_loss = Lambda(vae_cosine_distance_loss, output_shape=(1, ), name='main_loss')([input_word_embeddings, hidden]) kld_loss = Lambda(vae_kld_loss, output_shape=(1, ), name='kld_loss')([input_word_embeddings]) prediction = PredictionLayer(word_embedding_layer, sample_size, nclasses)(hidden) train_model = Model(inputs=[input_idx], outputs=[main_loss, kld_loss]) test_model = Model(inputs=[input_idx], outputs=[prediction]) return train_model, test_model
def vae_model(config_data, vocab, step): z_size = config_data['z_size'] sample_size = config_data['max_sentence_length'] lstm_size = config_data['lstm_size'] alpha = config_data['alpha'] intermediate_dim = config_data['intermediate_dim'] nfilter = 128 out_size = 200 eps = 0.001 anneal_start = 200000.0 anneal_end = anneal_start + 200000.0 embedding_path = join(config_data['vocab_path'], 'embedding_matrix.npy') embedding_matrix = np.load(open(embedding_path, 'rb')) nclasses = embedding_matrix.shape[0] emb_dim = embedding_matrix.shape[1] l2_regularizer = None # == == == == == = # Define Encoder # == == == == == = input_idx = Input(batch_shape=(None, sample_size), dtype='int32', name='word_input') #one_hot_weights = np.identity(nclasses) #oshape = (batch_size, sample_size, nclasses) one_hot_embeddings = Embedding( input_length=sample_size, input_dim=nclasses, output_dim=emb_dim, weights=[embedding_matrix], trainable=True, name='word_embeddings' ) input_one_hot_embeddings = one_hot_embeddings(input_idx) #oshape = (batch_size, sample_size/2, 128) conv1 = Conv1D( filters=nfilter, kernel_size=3, strides=2, padding='same', kernel_regularizer=l2_regularizer, bias_regularizer=l2_regularizer, activity_regularizer=l2_regularizer )(input_one_hot_embeddings) bn1 = BatchNormalization( beta_regularizer=l2_regularizer, gamma_regularizer=l2_regularizer )(conv1) relu1 = Activation(activation='relu')(bn1) # oshape = (batch_size, sample_size/4, 128) conv2 = Conv1D( filters=2*nfilter, kernel_size=3, strides=2, padding='same', kernel_regularizer=l2_regularizer, bias_regularizer=l2_regularizer, activity_regularizer=l2_regularizer )(relu1) bn2 = BatchNormalization( beta_regularizer=l2_regularizer, gamma_regularizer=l2_regularizer )(conv2) relu2 = Activation(activation='relu')(bn2) #oshape = (batch_size, sample_size/4*256) flatten = Flatten()(relu2) #need to store the size of the representation after the convolutions -> needed for deconv later hidden_intermediate_enc = Dense( intermediate_dim, activation='relu', name='intermediate_encoding', kernel_regularizer=l2_regularizer, bias_regularizer=l2_regularizer, activity_regularizer=l2_regularizer )(flatten) hidden_zvalues = Dense(z_size*2)(hidden_intermediate_enc) sampling_object = Sampling(z_size) sampling = sampling_object(hidden_zvalues) # == == == == == == == = # Define Decoder Layers # == == == == == == == = decoder_input_layer = Dense( intermediate_dim, name='intermediate_decoding', kernel_regularizer=l2_regularizer, bias_regularizer=l2_regularizer, activity_regularizer=l2_regularizer ) hidden_intermediate_dec = decoder_input_layer(sampling) decoder_upsample = Dense( int(2*nfilter*sample_size/4), kernel_regularizer=l2_regularizer, bias_regularizer=l2_regularizer, activity_regularizer=l2_regularizer )(hidden_intermediate_dec) if K.image_data_format() == 'channels_first': output_shape = (2*nfilter, int(sample_size/4), 1) else: output_shape = (int(sample_size/4), 1, 2*nfilter) reshape = Reshape(output_shape)(decoder_upsample) #shape = (batch_size, filters) deconv1 = Conv2DTranspose( filters=nfilter, kernel_size=(3, 1), strides=(2, 1), padding='same', kernel_regularizer=l2_regularizer, bias_regularizer=l2_regularizer, activity_regularizer=l2_regularizer )(reshape) bn3 = BatchNormalization( beta_regularizer=l2_regularizer, gamma_regularizer=l2_regularizer )(deconv1) relu3 = Activation(activation='relu')(bn3) deconv2 = Conv2DTranspose( filters=out_size, kernel_size=(3, 1), strides=(2, 1), padding='same', kernel_regularizer=l2_regularizer, bias_regularizer=l2_regularizer, activity_regularizer=l2_regularizer )(relu3) bn4 = BatchNormalization( beta_regularizer=l2_regularizer, gamma_regularizer=l2_regularizer )(deconv2) relu4 = Activation(activation='relu')(bn4) reshape = Reshape((sample_size, out_size))(relu4) hidden = Dense(out_size, activation='linear')(reshape) hidden = Dense(out_size, activation='linear')(hidden) hidden_auxiliary = Dense(out_size, activation='linear', name='auxiliary_output')(hidden) def argmax_fun(softmax_output): return K.argmax(softmax_output, axis=2) def remove_last_column(x): return x[:, :-1, :] padding = ZeroPadding1D(padding=(1, 0))(input_one_hot_embeddings) previous_char_slice = Lambda(remove_last_column, output_shape=(sample_size, out_size))(padding) combined_input = concatenate(inputs=[hidden_auxiliary, previous_char_slice], axis=2) #MUST BE IMPLEMENTATION 1 or 2 lstm = LSTM( 200, return_sequences=True, implementation=2, kernel_regularizer=l2_regularizer, bias_regularizer=l2_regularizer, recurrent_regularizer=l2_regularizer, activity_regularizer=l2_regularizer ) recurrent_component = lstm(combined_input) hidden_0 = Dense(out_size, activation='linear') hidden_1 = Dense(out_size, activation='linear') hidden_final = Dense(out_size, activation='linear', name='final_output') hidden_0_inst = hidden_0(recurrent_component) hidden_1_inst = hidden_1(hidden_0_inst) softmax_final = hidden_final(hidden_1_inst) def vae_cosine_distance_loss(args): x_truth, x_decoded_final = args #normalize over embedding-dimension xt_mag = K.l2_normalize(x_truth, axis=2) #None, 40, 200 xp_mag = K.l2_normalize(x_decoded_final, axis=2)#None, 40, 200 elem_mult = xt_mag*xp_mag cosine_sim = K.sum(elem_mult, axis=2) #None, 40 cosine_distance = 1 - cosine_sim #size = None, 40 sum_over_sentences = K.sum(cosine_distance, axis=1)#None return sum_over_sentences def vae_kld_loss(args): kl_loss = - 0.5 * K.sum(1 + sampling_object.log_sigma - K.square(sampling_object.mu) - K.exp(sampling_object.log_sigma), axis=-1) kld_weight = K.clip((step - anneal_start) / (anneal_end - anneal_start), 0, 1 - eps) + eps return kl_loss*kld_weight def identity_loss(y_true, y_pred): return y_pred main_loss = Lambda(vae_cosine_distance_loss, output_shape=(1,), name='main_loss')([input_one_hot_embeddings, softmax_final]) kld_loss = Lambda(vae_kld_loss, output_shape=(1,), name='kld_loss')([input_one_hot_embeddings, softmax_final, hidden_auxiliary]) aux_loss = Lambda(vae_cosine_distance_loss, output_shape=(1,), name='auxiliary_loss')([input_one_hot_embeddings, hidden_auxiliary]) output_gen_layer = LSTMStep(lstm, one_hot_embeddings, [hidden_0, hidden_1, hidden_final], sample_size, nclasses)(hidden_auxiliary) train_model = Model(inputs=[input_idx], outputs=[main_loss, kld_loss, aux_loss]) test_model = Model(inputs=[input_idx], outputs=[output_gen_layer]) return train_model, test_model