def autoencoder(dims, noise_sd=2.5, init='glorot_uniform', act='relu'): """ Fully connected auto-encoder model, symmetric. Arguments: dims: list of number of units in each layer of encoder. dims[0] is input dim, dims[-1] is units in hidden layer. The decoder is symmetric with encoder. So number of layers of the auto-encoder is 2*len(dims)-1 act: activation, not applied to Input, Hidden and Output layers return: Model of autoencoder """ n_stacks = len(dims) - 1 # input sf_layer = Input(shape=(1, ), name='size_factors') x = Input(shape=(dims[0], ), name='counts') h = x h = GaussianNoise(noise_sd, name='input_noise')(h) # internal layers in encoder for i in range(n_stacks - 1): h = Dense(dims[i + 1], kernel_initializer=init, name='encoder_%d' % i)(h) h = GaussianNoise(noise_sd, name='noise_%d' % i)(h) h = Activation(act)(h) # hidden layer h = Dense(dims[-1], kernel_initializer=init, name='encoder_hidden')( h) # hidden layer, features are extracted from here # internal layers in decoder for i in range(n_stacks - 1, 0, -1): h = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(h) # output pi = Dense(dims[0], activation='sigmoid', kernel_initializer=init, name='pi')(h) disp = Dense(dims[0], activation=DispAct, kernel_initializer=init, name='dispersion')(h) mean = Dense(dims[0], activation=MeanAct, kernel_initializer=init, name='mean')(h) output = ColWiseMultLayer(name='output')([mean, sf_layer]) output = SliceLayer(0, name='slice')([output, disp, pi]) return Model(inputs=[x, sf_layer], outputs=output)
def create_model(model, dims, act='relu', init='glorot_uniform', ridge=0, debug=False, **kwargs): #n_clusters=args.n_clusters ##alpha=1.0, assert model in ["ae", "vae", "iaf"] #if model == "ae": noise_sd = kwargs.get('noise_sd', 2.5) if model == "iaf": num_trans = kwargs.get('num_trans', 6) n_stacks = len(dims) - 1 # input counts_input = Input(shape=(dims[0], ), name='counts') h = counts_input #if model == "ae": h = GaussianNoise(noise_sd, name='input_noise')(h) # internal layers in encoder for i in range(n_stacks - 1): h = Dense(dims[i + 1], kernel_initializer=init, name='encoder_%d' % i)(h) if model == "ae" or model == "vae": h = GaussianNoise(noise_sd, name='noise_%d' % i)(h) # add Gaussian noise h = Activation(act)(h) # hidden layer if model == "ae": h = Dense(dims[-1], kernel_initializer=init, name='encoder_hidden')( h) # hidden layer, features are extracted from here latent_layer = h elif model == "vae": z_mean = Dense(dims[-1], name='z_mean')(h) z_log_var = Dense(dims[-1], name='z_log_var')(h) z = Lambda(sampling, output_shape=(dims[-1], ), name='z')([z_mean, z_log_var]) h = z latent_layer = z_mean else: z, mu, sig, kl = build_iaf_layer(h, _name='IAF', num_trans=num_trans, latent_dim=dims[-1]) z = z[-1] mu = mu[-1] h = z latent_layer = mu sf_layer = Input(shape=(1, ), name='size_factors') # internal layers in decoder for i in range(n_stacks - 1, 0, -1): h = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(h) # output pi = Dense(dims[0], activation='sigmoid', kernel_initializer=init, name='pi')(h) disp = Dense(dims[0], activation=DispAct, kernel_initializer=init, name='dispersion')(h) mean = Dense(dims[0], activation=MeanAct, kernel_initializer=init, name='mean')(h) adjusted_mean = ColWiseMultLayer(name='output')([mean, sf_layer]) outputs = SliceLayer(0, name='slice')([adjusted_mean, disp, pi]) model_network = Model([counts_input, sf_layer], outputs, name=model + '_mlp') encoder_network = Model(counts_input, latent_layer, name='encoder') imputation_no_zi_network = Model([counts_input, sf_layer], adjusted_mean, name=model + '_mlp') # loss zinb = ZINB(pi, theta=disp, ridge_lambda=ridge, debug=debug) if model == "ae": def loss(y_true, y_pred): return zinb.loss(y_true=y_true, y_pred=y_pred) elif model == "vae": def loss(y_true, y_pred): reconstruction_loss = zinb.loss( y_true=y_true, y_pred=y_pred) # tf.get_variable(output, (17925, 73909))) reconstruction_loss *= dims[0] kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) #kl_loss = K.mean(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) return vae_loss #return reconstruction_loss else: def loss(y_true, y_pred): reconstruction_loss = zinb.loss( y_true=y_true, y_pred=y_pred) # tf.get_variable(output, (17925, 73909))) reconstruction_loss *= dims[0] vae_loss = K.mean(reconstruction_loss + kl) return vae_loss #return reconstruction_loss return model_network, encoder_network, imputation_no_zi_network, loss, counts_input, latent_layer
# output pi = Dense(dims[0], activation='sigmoid', kernel_initializer=init, name='pi')(h) disp = Dense(dims[0], activation=DispAct, kernel_initializer=init, name='dispersion')(h) mean = Dense(dims[0], activation=MeanAct, kernel_initializer=init, name='mean')(h) outputs = ColWiseMultLayer(name='output')([mean, sf_layer]) outputs = SliceLayer(0, name='slice')([outputs, disp, pi]) # instantiate decoder model #decoder = Model([latent_inputs, sf_layer], outputs, name='decoder') #decoder.summary() #plot_model(decoder, to_file='vae_mlp_decoder.png', show_shapes=True) #outputs = decoder([encoder(x)[2], sf_layer]) vae = Model([counts_input, sf_layer], outputs, name='vae_mlp') plot_model(vae, to_file='vae_mlp.png', show_shapes=True) if __name__ == "__main__": # setting the hyper parameters import argparse
def autoencoder(dims, n_clusters, noise_sd=0, init='glorot_uniform', act='relu', temp=500.0): """ Fully connected auto-encoder model, symmetric. Arguments: dims: list of number of units in each layer of encoder. dims[0] is input dim, dims[-1] is units in hidden layer. The decoder is symmetric with encoder. So number of layers of the auto-encoder is 2*len(dims)-1 act: activation, not applied to Input, Hidden and Output layers return: Model of autoencoder """ n_stacks = len(dims) - 1 # input sf_layer = Input(shape=(1, ), name='size_factors') x = Input(shape=(dims[0], ), name='counts') h = x #h = GaussianNoise(noise_sd, name='input_noise')(h) # internal layers in encoder for i in range(n_stacks): h = Dense(dims[i + 1], kernel_initializer=init, name='encoder_%d' % i)(h) #h = GaussianNoise(noise_sd, name='noise_%d' % i)(h) # add Gaussian noise h = Activation(act)(h) # hidden layer #h = Dense(n_clusters, kernel_initializer=init, name='encoder_hidden')(h) # hidden layer, features are extracted from here #K.set_value(tau, np.max([K.get_value(tau) * np.exp(- anneal_rate * e), min_temperature])) #global temperature #temperature = K.variable(temp,name="temperature") z_mean = Dense(n_clusters, name='z_mean')(h) z_log_var = Dense(n_clusters, name='z_log_var')(h) z_output = Concatenate([z_mean, z_log_var], axis=-1) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(sampling_kld, output_shape=(n_clusters, ), name='z')([z_mean, z_log_var]) #gumbel_layer = Lambda(gumbel_softmax, name = "gumbel_layer", arguments={'hard':False})(h) #h = gumbel_softmax(h,hard=False) #h=gumbel_layer h = z # internal layers in decoder for i in range(n_stacks, 0, -1): h = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(h) # output pi = Dense(dims[0], activation='sigmoid', kernel_initializer=init, name='pi')(h) disp = Dense(dims[0], activation=DispAct, kernel_initializer=init, name='dispersion')(h) mean = Dense(dims[0], activation=MeanAct, kernel_initializer=init, name='mean')(h) output = ColWiseMultLayer(name='output')([mean, sf_layer]) output = SliceLayer(0, name='slice')([output, disp, pi]) return Model(inputs=[x, sf_layer], outputs=[output, z_output])