def predict(self, adata, encoder_labels, decoder_labels): """ Predicts the cell type provided by the user in stimulated condition. # Parameters data: `~anndata.AnnData` Annotated data matrix whether in primary space. labels: numpy nd-array `numpy nd-array` of labels to be fed as CVAE's condition array. # Returns stim_pred: numpy nd-array `numpy nd-array` of predicted cells in primary space. # Example ```python import scanpy as sc import scgen train_data = sc.read("train_kang.h5ad") validation_data = sc.read("./data/validation.h5ad") network = scgen.CVAE(train_data=train_data, use_validation=True, validation_data=validation_data, model_path="./saved_models/", conditions={"ctrl": "control", "stim": "stimulated"}) network.scripts(n_epochs=20) prediction = network.predict('CD4T', obs_key={"cell_type": ["CD8T", "NK"]}) ``` """ adata = remove_sparsity(adata) latent = self.sess.run(self.z_mean, feed_dict={self.x: adata.X, self.encoder_labels: encoder_labels, self.size: adata.shape[0], self.is_training: False}) reconstructed = self.sess.run(self.x_hat, feed_dict={self.z_mean: latent, self.decoder_labels: decoder_labels, self.is_training: False}) reconstructed_adata = anndata.AnnData(X=reconstructed) reconstructed_adata.obs = adata.obs.copy(deep=True) reconstructed_adata.var_names = adata.var_names return reconstructed_adata
def to_mmd_layer(self, adata, encoder_labels, decoder_labels): """ Map `data` in to the pn layer after latent layer. This function will feed data in encoder part of C-VAE and compute the latent space coordinates for each sample in data. # Parameters data: `~anndata.AnnData` Annotated data matrix to be mapped to latent space. `data.X` has to be in shape [n_obs, n_vars]. labels: numpy nd-array `numpy nd-array` of labels to be fed as CVAE's condition array. # Returns latent: numpy nd-array returns array containing latent space encoding of 'data' """ adata = remove_sparsity(adata) encoder_labels = to_categorical(encoder_labels, num_classes=self.n_conditions) decoder_labels = to_categorical(decoder_labels, num_classes=self.n_conditions) latent = self.encoder_model.predict([adata.X, encoder_labels])[2] mmd_latent = self.aux_models['mmd'].predict([latent, decoder_labels]) mmd_adata = anndata.AnnData(X=mmd_latent) mmd_adata.obs = adata.obs.copy(deep=True) return mmd_adata
def to_latent(self, adata, encoder_labels, return_adata=True): """ Map `data` in to the latent space. This function will feed data in encoder part of C-VAE and compute the latent space coordinates for each sample in data. # Parameters data: `~anndata.AnnData` Annotated data matrix to be mapped to latent space. `data.X` has to be in shape [n_obs, n_vars]. labels: numpy nd-array `numpy nd-array` of labels to be fed as CVAE's condition array. # Returns latent: numpy nd-array returns array containing latent space encoding of 'data' """ adata = remove_sparsity(adata) encoder_labels = to_categorical(encoder_labels, num_classes=self.n_conditions) latent = self.encoder_model.predict([adata.X, encoder_labels])[2] latent = np.nan_to_num(latent) if return_adata: output = anndata.AnnData(X=latent) output.obs = adata.obs.copy(deep=True) else: output = latent return output
def to_mmd_layer(self, adata, encoder_labels, feed_fake=0): """ Map `data` in to the pn layer after latent layer. This function will feed data in encoder part of C-VAE and compute the latent space coordinates for each sample in data. # Parameters data: `~anndata.AnnData` Annotated data matrix to be mapped to latent space. `data.X` has to be in shape [n_obs, n_vars]. labels: numpy nd-array `numpy nd-array` of labels to be fed as CVAE's condition array. # Returns latent: numpy nd-array returns array containing latent space encoding of 'data' """ if feed_fake > -1: decoder_labels = np.zeros(shape=encoder_labels.shape) + feed_fake else: decoder_labels = encoder_labels adata = remove_sparsity(adata) images = np.reshape(adata.X, (-1, *self.x_dim)) encoder_labels = to_categorical(encoder_labels, num_classes=self.n_conditions) decoder_labels = to_categorical(decoder_labels, num_classes=self.n_conditions) mmd_latent = self.cvae_model.predict( [images, encoder_labels, decoder_labels])[1] mmd_adata = anndata.AnnData(X=mmd_latent) mmd_adata.obs = adata.obs.copy(deep=True) return mmd_adata
def to_mmd_layer(self, adata): adata = remove_sparsity(adata) mmd_latent = self.aux_models['mmd'].predict(adata.X) mmd_adata = anndata.AnnData(X=mmd_latent) mmd_adata.obs = adata.obs.copy(deep=True) return mmd_adata
def to_latent(self, adata): adata = remove_sparsity(adata) latent = self.aux_models['gen_AB_latent'].predict(adata.X) latent_adata = anndata.AnnData(X=latent) latent_adata.obs = adata.obs(deep=True) return latent_adata
def train(self, train_adata, condition_key, le=None, n_epochs=1000, batch_size=256): train_adata = remove_sparsity(train_adata) x_train = train_adata.X y_train, _ = label_encoder(train_adata, le, condition_key) y_train = np.reshape(y_train, (-1,)) train_loader = Loader(x_train, labels=y_train, shuffle=True) self.model_backend.train(train_loader, n_epochs, batch_size)
def to_latent(self, adata, labels): adata = remove_sparsity(adata) data_loader = Loader(data=adata.X, labels=labels, shuffle=False) latent = self.model_backend.get_embedding(data_loader) latent = latent[0] latent = np.nan_to_num(latent) latent_adata = anndata.AnnData(X=latent) latent_adata.obs = adata.obs.copy(deep=True) return latent_adata
def to_latent(self, adata): if isinstance(adata, anndata.AnnData): adata = remove_sparsity(adata) latent = self.encoder_model.predict(adata.X) latent_adata = anndata.AnnData(X=latent) latent_adata.obs = adata.obs.copy(deep=True) return latent_adata else: return self.encoder_model.predict(adata)
def predict(self, adata, cell_type_to_predict, source_condition, condition_key, cell_type_key): adata = remove_sparsity(adata) cell_type_adata = adata[adata.obs[cell_type_key] == cell_type_to_predict] source_adata = cell_type_adata[cell_type_adata.obs[condition_key] == source_condition] reconstructed = self.g_AB.predict(source_adata.X) reconstructed_adata = anndata.AnnData(X=reconstructed) reconstructed_adata.obs = cell_type_adata.obs(deep=True) reconstructed_adata.var_names = cell_type_adata.var_names return reconstructed_adata
def predict(self, adata, encoder_labels, decoder_labels, return_adata=True): """ Predicts the cell type provided by the user in stimulated condition. # Parameters data: `~anndata.AnnData` Annotated data matrix whether in primary space. labels: numpy nd-array `numpy nd-array` of labels to be fed as CVAE's condition array. # Returns stim_pred: numpy nd-array `numpy nd-array` of predicted cells in primary space. # Example ```python import scanpy as sc import scgen train_data = sc.read("train_kang.h5ad") validation_data = sc.read("./data/validation.h5ad") network = scgen.CVAE(train_data=train_data, use_validation=True, validation_data=validation_data, model_path="./saved_models/", conditions={"ctrl": "control", "stim": "stimulated"}) network.scripts(n_epochs=20) prediction = network.predict('CD4T', obs_key={"cell_type": ["CD8T", "NK"]}) ``` """ adata = remove_sparsity(adata) encoder_labels = to_categorical(encoder_labels, num_classes=self.n_conditions) decoder_labels = to_categorical(decoder_labels, num_classes=self.n_conditions) reconstructed = self.cvae_model.predict( [adata.X, encoder_labels, decoder_labels])[0] reconstructed = np.nan_to_num(reconstructed) if return_adata: output = anndata.AnnData(X=reconstructed) output.obs = adata.obs.copy(deep=True) output.var_names = adata.var_names else: output = reconstructed return output
def predict(self, adata, target_label, condition_key, cell_type_key, cell_type_to_predict, source_condition, target_condition): adata = remove_sparsity(adata) cell_type_adata = adata[adata.obs[cell_type_key] == cell_type_to_predict] source_adata = cell_type_adata[cell_type_adata.obs[condition_key] == source_condition] y_test = np.zeros(source_adata.shape[0]) + target_label real_loader = Loader(source_adata.X, labels=y_test, shuffle=False) pred = self.model_backend.get_reconstruction(real_loader) pred = np.nan_to_num(pred[0]) pred_adata = anndata.AnnData(X=pred) pred_adata.obs[condition_key] = f"{cell_type_to_predict}_pred_{target_condition}" pred_adata.var_names = adata.var_names return pred_adata
def train(self, train_data, validation_data=None, n_epochs=25, batch_size=32, early_stop_limit=20, threshold=0.0025, initial_run=True, shuffle=True, verbose=1, save=True, checkpoint=50, **kwargs): if initial_run: log.info("----Training----") if shuffle: train_data = shuffle_data(train_data) train_data = remove_sparsity(train_data) callbacks = [ EarlyStopping(patience=early_stop_limit, monitor='val_loss', min_delta=threshold), CSVLogger(filename="./csv_logger.log") ] if validation_data is not None: result = self.vae_model.fit(x=train_data.X, y=train_data.X, epochs=n_epochs, batch_size=batch_size, validation_data=(validation_data.X, validation_data.X), shuffle=shuffle, callbacks=callbacks, verbose=verbose) else: result = self.vae_model.fit(x=train_data.X, y=train_data.X, validation_split=0.2, epochs=n_epochs, batch_size=batch_size, shuffle=shuffle, callbacks=callbacks, verbose=verbose) if save is True: self.save_model() return result
def to_mmd_layer(self, adata, encoder_labels, decoder_labels): """ Map `data` in to the pn layer after latent layer. This function will feed data in encoder part of C-VAE and compute the latent space coordinates for each sample in data. # Parameters data: `~anndata.AnnData` Annotated data matrix to be mapped to latent space. `data.X` has to be in shape [n_obs, n_vars]. labels: numpy nd-array `numpy nd-array` of labels to be fed as CVAE's condition array. # Returns latent: numpy nd-array returns array containing latent space encoding of 'data' """ adata = remove_sparsity(adata) mmd_latent = self.sess.run(self.mmd_hl, feed_dict={self.x: adata.X, self.encoder_labels: encoder_labels, self.decoder_labels: decoder_labels, self.size: adata.shape[0], self.is_training: False}) mmd_adata = anndata.AnnData(X=mmd_latent) mmd_adata.obs = adata.obs.copy(deep=True) return mmd_adata
input_shape = (64, 64, 3) elif data_name == "mnist": conditions = ["normal", "thin", "thick"] target_conditions = ["thin", 'thick'] source_condition = "normal" labelencoder = {"normal": 0, "thin": 1, "thick": 2} label_key = "labels" condition_key = "condition" specific_labels = [1, 3, 6, 7] arch_style = 1 adata = sc.read("./data/thick_thin_mnist/thick_thin_mnist.h5ad") input_shape = (28, 28, 1) else: raise Exception("Invalid data name") adata = remove_sparsity(adata) # Preprocessing adata.X /= 255.0 train_adata, valid_adata = reptrvae.utils.train_test_split(adata, 0.80) net_train_adata = train_adata[ ~((train_adata.obs[label_key].isin(specific_labels)) & (train_adata.obs[condition_key].isin(target_conditions)))] net_valid_adata = valid_adata[ ~((valid_adata.obs[label_key].isin(specific_labels)) & (valid_adata.obs[condition_key].isin(target_conditions)))] network = reptrvae.models.DCtrVAE(x_dimension=input_shape, z_dimension=60, n_conditions=len(net_train_adata.obs[condition_key].unique()), alpha=5e-5,
def train(self, train_adata, valid_adata=None, condition_encoder=None, condition_key='condition', n_epochs=25, batch_size=32, early_stop_limit=20, lr_reducer=10, threshold=0.0025, monitor='val_loss', shuffle=True, verbose=2, save=True): """ Trains the network `n_epochs` times with given `train_data` and validates the model using validation_data if it was given in the constructor function. This function is using `early stopping` technique to prevent overfitting. # Parameters n_epochs: int number of epochs to iterate and optimize network weights early_stop_limit: int number of consecutive epochs in which network loss is not going lower. After this limit, the network will stop training. threshold: float Threshold for difference between consecutive validation loss values if the difference is upper than this `threshold`, this epoch will not considered as an epoch in early stopping. full_training: bool if `True`: Network will be trained with all batches of data in each epoch. if `False`: Network will be trained with a random batch of data in each epoch. initial_run: bool if `True`: The network will initiate training and log some useful initial messages. if `False`: Network will resume the training using `restore_model` function in order to restore last model which has been trained with some training dataset. # Returns Nothing will be returned # Example ```python import scanpy as sc import scgen train_data = sc.read(train_katrain_kang.h5ad >>> validation_data = sc.read(valid_kang.h5ad) network = scgen.CVAE(train_data=train_data, use_validation=True, validation_data=validation_data, model_path="./saved_models/", conditions={"ctrl": "control", "stim": "stimulated"}) network.train(n_epochs=20) ``` """ train_adata = remove_sparsity(train_adata) train_labels_encoded, self.condition_encoder = label_encoder( train_adata, condition_encoder, condition_key) train_labels_onehot = to_categorical(train_labels_encoded, num_classes=self.n_conditions) callbacks = [ History(), CSVLogger(filename="./csv_logger.log"), ] if early_stop_limit > 0: callbacks.append( EarlyStopping(patience=early_stop_limit, monitor=monitor, min_delta=threshold)) if lr_reducer > 0: callbacks.append( ReduceLROnPlateau(monitor=monitor, patience=lr_reducer, verbose=verbose)) if verbose > 2: callbacks.append( LambdaCallback(on_epoch_end=lambda epoch, logs: print_message( epoch, logs, n_epochs, verbose))) fit_verbose = 0 else: fit_verbose = verbose train_images = np.reshape(train_adata.X, (-1, *self.x_dim)) x = [train_images, train_labels_onehot, train_labels_onehot] y = [train_images, train_labels_encoded] if valid_adata is not None: valid_adata = remove_sparsity(valid_adata) valid_labels_encoded, _ = label_encoder(valid_adata, condition_encoder, condition_key) valid_labels_onehot = to_categorical(valid_labels_encoded, num_classes=self.n_conditions) valid_images = np.reshape(valid_adata.X, (-1, *self.x_dim)) x_valid = [valid_images, valid_labels_onehot, valid_labels_onehot] y_valid = [valid_images, valid_labels_encoded] self.cvae_model.fit(x=x, y=y, epochs=n_epochs, batch_size=batch_size, validation_data=(x_valid, y_valid), shuffle=shuffle, callbacks=callbacks, verbose=fit_verbose) else: self.cvae_model.fit(x=x, y=y, epochs=n_epochs, batch_size=batch_size, validation_split=0.2, shuffle=shuffle, callbacks=callbacks, verbose=fit_verbose) if save: self.save_model()