示例#1
0
    def predict(self, adata, encoder_labels, decoder_labels):
        """
            Predicts the cell type provided by the user in stimulated condition.
            # Parameters
                data: `~anndata.AnnData`
                    Annotated data matrix whether in primary space.
                labels: numpy nd-array
                    `numpy nd-array` of labels to be fed as CVAE's condition array.
            # Returns
                stim_pred: numpy nd-array
                    `numpy nd-array` of predicted cells in primary space.
            # Example
            ```python
            import scanpy as sc
            import scgen
            train_data = sc.read("train_kang.h5ad")
            validation_data = sc.read("./data/validation.h5ad")
            network = scgen.CVAE(train_data=train_data, use_validation=True, validation_data=validation_data, model_path="./saved_models/", conditions={"ctrl": "control", "stim": "stimulated"})
            network.scripts(n_epochs=20)
            prediction = network.predict('CD4T', obs_key={"cell_type": ["CD8T", "NK"]})
            ```
        """
        adata = remove_sparsity(adata)
        latent = self.sess.run(self.z_mean, feed_dict={self.x: adata.X, self.encoder_labels: encoder_labels,
                                                       self.size: adata.shape[0], self.is_training: False})

        reconstructed = self.sess.run(self.x_hat, feed_dict={self.z_mean: latent, self.decoder_labels: decoder_labels,
                                                             self.is_training: False})

        reconstructed_adata = anndata.AnnData(X=reconstructed)
        reconstructed_adata.obs = adata.obs.copy(deep=True)
        reconstructed_adata.var_names = adata.var_names
        return reconstructed_adata
    def to_mmd_layer(self, adata, encoder_labels, decoder_labels):
        """
            Map `data` in to the pn layer after latent layer. This function will feed data
            in encoder part of C-VAE and compute the latent space coordinates
            for each sample in data.
            # Parameters
                data: `~anndata.AnnData`
                    Annotated data matrix to be mapped to latent space. `data.X` has to be in shape [n_obs, n_vars].
                labels: numpy nd-array
                    `numpy nd-array` of labels to be fed as CVAE's condition array.
            # Returns
                latent: numpy nd-array
                    returns array containing latent space encoding of 'data'
        """
        adata = remove_sparsity(adata)

        encoder_labels = to_categorical(encoder_labels,
                                        num_classes=self.n_conditions)
        decoder_labels = to_categorical(decoder_labels,
                                        num_classes=self.n_conditions)

        latent = self.encoder_model.predict([adata.X, encoder_labels])[2]

        mmd_latent = self.aux_models['mmd'].predict([latent, decoder_labels])
        mmd_adata = anndata.AnnData(X=mmd_latent)
        mmd_adata.obs = adata.obs.copy(deep=True)

        return mmd_adata
    def to_latent(self, adata, encoder_labels, return_adata=True):
        """
            Map `data` in to the latent space. This function will feed data
            in encoder part of C-VAE and compute the latent space coordinates
            for each sample in data.
            # Parameters
                data: `~anndata.AnnData`
                    Annotated data matrix to be mapped to latent space. `data.X` has to be in shape [n_obs, n_vars].
                labels: numpy nd-array
                    `numpy nd-array` of labels to be fed as CVAE's condition array.
            # Returns
                latent: numpy nd-array
                    returns array containing latent space encoding of 'data'
        """
        adata = remove_sparsity(adata)

        encoder_labels = to_categorical(encoder_labels,
                                        num_classes=self.n_conditions)
        latent = self.encoder_model.predict([adata.X, encoder_labels])[2]
        latent = np.nan_to_num(latent)

        if return_adata:
            output = anndata.AnnData(X=latent)
            output.obs = adata.obs.copy(deep=True)
        else:
            output = latent

        return output
示例#4
0
    def to_mmd_layer(self, adata, encoder_labels, feed_fake=0):
        """
            Map `data` in to the pn layer after latent layer. This function will feed data
            in encoder part of C-VAE and compute the latent space coordinates
            for each sample in data.
            # Parameters
                data: `~anndata.AnnData`
                    Annotated data matrix to be mapped to latent space. `data.X` has to be in shape [n_obs, n_vars].
                labels: numpy nd-array
                    `numpy nd-array` of labels to be fed as CVAE's condition array.
            # Returns
                latent: numpy nd-array
                    returns array containing latent space encoding of 'data'
        """
        if feed_fake > -1:
            decoder_labels = np.zeros(shape=encoder_labels.shape) + feed_fake
        else:
            decoder_labels = encoder_labels
        adata = remove_sparsity(adata)

        images = np.reshape(adata.X, (-1, *self.x_dim))
        encoder_labels = to_categorical(encoder_labels,
                                        num_classes=self.n_conditions)
        decoder_labels = to_categorical(decoder_labels,
                                        num_classes=self.n_conditions)

        mmd_latent = self.cvae_model.predict(
            [images, encoder_labels, decoder_labels])[1]
        mmd_adata = anndata.AnnData(X=mmd_latent)
        mmd_adata.obs = adata.obs.copy(deep=True)

        return mmd_adata
示例#5
0
    def to_mmd_layer(self, adata):
        adata = remove_sparsity(adata)

        mmd_latent = self.aux_models['mmd'].predict(adata.X)
        mmd_adata = anndata.AnnData(X=mmd_latent)
        mmd_adata.obs = adata.obs.copy(deep=True)

        return mmd_adata
示例#6
0
    def to_latent(self, adata):
        adata = remove_sparsity(adata)

        latent = self.aux_models['gen_AB_latent'].predict(adata.X)
        latent_adata = anndata.AnnData(X=latent)
        latent_adata.obs = adata.obs(deep=True)

        return latent_adata
示例#7
0
    def train(self, train_adata, condition_key, le=None, n_epochs=1000, batch_size=256):
        train_adata = remove_sparsity(train_adata)

        x_train = train_adata.X
        y_train, _ = label_encoder(train_adata, le, condition_key)
        y_train = np.reshape(y_train, (-1,))

        train_loader = Loader(x_train, labels=y_train, shuffle=True)

        self.model_backend.train(train_loader, n_epochs, batch_size)
示例#8
0
    def to_latent(self, adata, labels):
        adata = remove_sparsity(adata)

        data_loader = Loader(data=adata.X, labels=labels, shuffle=False)

        latent = self.model_backend.get_embedding(data_loader)
        latent = latent[0]
        latent = np.nan_to_num(latent)
        latent_adata = anndata.AnnData(X=latent)
        latent_adata.obs = adata.obs.copy(deep=True)
        return latent_adata
示例#9
0
    def to_latent(self, adata):
        if isinstance(adata, anndata.AnnData):
            adata = remove_sparsity(adata)

            latent = self.encoder_model.predict(adata.X)
            latent_adata = anndata.AnnData(X=latent)
            latent_adata.obs = adata.obs.copy(deep=True)

            return latent_adata
        else:
            return self.encoder_model.predict(adata)
示例#10
0
    def predict(self, adata, cell_type_to_predict, source_condition, condition_key, cell_type_key):
        adata = remove_sparsity(adata)

        cell_type_adata = adata[adata.obs[cell_type_key] == cell_type_to_predict]
        source_adata = cell_type_adata[cell_type_adata.obs[condition_key] == source_condition]

        reconstructed = self.g_AB.predict(source_adata.X)
        reconstructed_adata = anndata.AnnData(X=reconstructed)
        reconstructed_adata.obs = cell_type_adata.obs(deep=True)
        reconstructed_adata.var_names = cell_type_adata.var_names

        return reconstructed_adata
    def predict(self,
                adata,
                encoder_labels,
                decoder_labels,
                return_adata=True):
        """
            Predicts the cell type provided by the user in stimulated condition.
            # Parameters
                data: `~anndata.AnnData`
                    Annotated data matrix whether in primary space.
                labels: numpy nd-array
                    `numpy nd-array` of labels to be fed as CVAE's condition array.
            # Returns
                stim_pred: numpy nd-array
                    `numpy nd-array` of predicted cells in primary space.
            # Example
            ```python
            import scanpy as sc
            import scgen
            train_data = sc.read("train_kang.h5ad")
            validation_data = sc.read("./data/validation.h5ad")
            network = scgen.CVAE(train_data=train_data, use_validation=True, validation_data=validation_data, model_path="./saved_models/", conditions={"ctrl": "control", "stim": "stimulated"})
            network.scripts(n_epochs=20)
            prediction = network.predict('CD4T', obs_key={"cell_type": ["CD8T", "NK"]})
            ```
        """
        adata = remove_sparsity(adata)

        encoder_labels = to_categorical(encoder_labels,
                                        num_classes=self.n_conditions)
        decoder_labels = to_categorical(decoder_labels,
                                        num_classes=self.n_conditions)

        reconstructed = self.cvae_model.predict(
            [adata.X, encoder_labels, decoder_labels])[0]
        reconstructed = np.nan_to_num(reconstructed)

        if return_adata:
            output = anndata.AnnData(X=reconstructed)
            output.obs = adata.obs.copy(deep=True)
            output.var_names = adata.var_names
        else:
            output = reconstructed

        return output
示例#12
0
    def predict(self, adata, target_label, condition_key, cell_type_key, cell_type_to_predict, source_condition,
                target_condition):
        adata = remove_sparsity(adata)

        cell_type_adata = adata[adata.obs[cell_type_key] == cell_type_to_predict]
        source_adata = cell_type_adata[cell_type_adata.obs[condition_key] == source_condition]

        y_test = np.zeros(source_adata.shape[0]) + target_label
        real_loader = Loader(source_adata.X, labels=y_test, shuffle=False)

        pred = self.model_backend.get_reconstruction(real_loader)
        pred = np.nan_to_num(pred[0])

        pred_adata = anndata.AnnData(X=pred)
        pred_adata.obs[condition_key] = f"{cell_type_to_predict}_pred_{target_condition}"
        pred_adata.var_names = adata.var_names

        return pred_adata
示例#13
0
    def train(self, train_data, validation_data=None,
              n_epochs=25,
              batch_size=32,
              early_stop_limit=20,
              threshold=0.0025,
              initial_run=True,
              shuffle=True,
              verbose=1,
              save=True,
              checkpoint=50,
              **kwargs):
        if initial_run:
            log.info("----Training----")
        if shuffle:
            train_data = shuffle_data(train_data)

        train_data = remove_sparsity(train_data)

        callbacks = [
            EarlyStopping(patience=early_stop_limit, monitor='val_loss', min_delta=threshold),
            CSVLogger(filename="./csv_logger.log")
        ]
        if validation_data is not None:
            result = self.vae_model.fit(x=train_data.X,
                                        y=train_data.X,
                                        epochs=n_epochs,
                                        batch_size=batch_size,
                                        validation_data=(validation_data.X, validation_data.X),
                                        shuffle=shuffle,
                                        callbacks=callbacks,
                                        verbose=verbose)
        else:
            result = self.vae_model.fit(x=train_data.X,
                                        y=train_data.X,
                                        validation_split=0.2,
                                        epochs=n_epochs,
                                        batch_size=batch_size,
                                        shuffle=shuffle,
                                        callbacks=callbacks,
                                        verbose=verbose)

        if save is True:
            self.save_model()
        return result
示例#14
0
    def to_mmd_layer(self, adata, encoder_labels, decoder_labels):
        """
            Map `data` in to the pn layer after latent layer. This function will feed data
            in encoder part of C-VAE and compute the latent space coordinates
            for each sample in data.
            # Parameters
                data: `~anndata.AnnData`
                    Annotated data matrix to be mapped to latent space. `data.X` has to be in shape [n_obs, n_vars].
                labels: numpy nd-array
                    `numpy nd-array` of labels to be fed as CVAE's condition array.
            # Returns
                latent: numpy nd-array
                    returns array containing latent space encoding of 'data'
        """
        adata = remove_sparsity(adata)
        mmd_latent = self.sess.run(self.mmd_hl, feed_dict={self.x: adata.X, self.encoder_labels: encoder_labels,
                                                           self.decoder_labels: decoder_labels,
                                                           self.size: adata.shape[0], self.is_training: False})
        mmd_adata = anndata.AnnData(X=mmd_latent)
        mmd_adata.obs = adata.obs.copy(deep=True)

        return mmd_adata
    input_shape = (64, 64, 3)
elif data_name == "mnist":
    conditions = ["normal", "thin", "thick"]
    target_conditions = ["thin", 'thick']
    source_condition = "normal"
    labelencoder = {"normal": 0, "thin": 1, "thick": 2}
    label_key = "labels"
    condition_key = "condition"
    specific_labels = [1, 3, 6, 7]
    arch_style = 1
    adata = sc.read("./data/thick_thin_mnist/thick_thin_mnist.h5ad")
    input_shape = (28, 28, 1)
else:
    raise Exception("Invalid data name")

adata = remove_sparsity(adata)
# Preprocessing
adata.X /= 255.0


train_adata, valid_adata = reptrvae.utils.train_test_split(adata, 0.80)

net_train_adata = train_adata[
    ~((train_adata.obs[label_key].isin(specific_labels)) & (train_adata.obs[condition_key].isin(target_conditions)))]
net_valid_adata = valid_adata[
    ~((valid_adata.obs[label_key].isin(specific_labels)) & (valid_adata.obs[condition_key].isin(target_conditions)))]

network = reptrvae.models.DCtrVAE(x_dimension=input_shape,
                                  z_dimension=60,
                                  n_conditions=len(net_train_adata.obs[condition_key].unique()),
                                  alpha=5e-5,
示例#16
0
    def train(self,
              train_adata,
              valid_adata=None,
              condition_encoder=None,
              condition_key='condition',
              n_epochs=25,
              batch_size=32,
              early_stop_limit=20,
              lr_reducer=10,
              threshold=0.0025,
              monitor='val_loss',
              shuffle=True,
              verbose=2,
              save=True):
        """
            Trains the network `n_epochs` times with given `train_data`
            and validates the model using validation_data if it was given
            in the constructor function. This function is using `early stopping`
            technique to prevent overfitting.
            # Parameters
                n_epochs: int
                    number of epochs to iterate and optimize network weights
                early_stop_limit: int
                    number of consecutive epochs in which network loss is not going lower.
                    After this limit, the network will stop training.
                threshold: float
                    Threshold for difference between consecutive validation loss values
                    if the difference is upper than this `threshold`, this epoch will not
                    considered as an epoch in early stopping.
                full_training: bool
                    if `True`: Network will be trained with all batches of data in each epoch.
                    if `False`: Network will be trained with a random batch of data in each epoch.
                initial_run: bool
                    if `True`: The network will initiate training and log some useful initial messages.
                    if `False`: Network will resume the training using `restore_model` function in order
                        to restore last model which has been trained with some training dataset.
            # Returns
                Nothing will be returned
            # Example
            ```python
            import scanpy as sc
            import scgen
            train_data = sc.read(train_katrain_kang.h5ad           >>> validation_data = sc.read(valid_kang.h5ad)
            network = scgen.CVAE(train_data=train_data, use_validation=True, validation_data=validation_data, model_path="./saved_models/", conditions={"ctrl": "control", "stim": "stimulated"})
            network.train(n_epochs=20)
            ```
        """
        train_adata = remove_sparsity(train_adata)

        train_labels_encoded, self.condition_encoder = label_encoder(
            train_adata, condition_encoder, condition_key)
        train_labels_onehot = to_categorical(train_labels_encoded,
                                             num_classes=self.n_conditions)

        callbacks = [
            History(),
            CSVLogger(filename="./csv_logger.log"),
        ]
        if early_stop_limit > 0:
            callbacks.append(
                EarlyStopping(patience=early_stop_limit,
                              monitor=monitor,
                              min_delta=threshold))

        if lr_reducer > 0:
            callbacks.append(
                ReduceLROnPlateau(monitor=monitor,
                                  patience=lr_reducer,
                                  verbose=verbose))

        if verbose > 2:
            callbacks.append(
                LambdaCallback(on_epoch_end=lambda epoch, logs: print_message(
                    epoch, logs, n_epochs, verbose)))
            fit_verbose = 0
        else:
            fit_verbose = verbose

        train_images = np.reshape(train_adata.X, (-1, *self.x_dim))

        x = [train_images, train_labels_onehot, train_labels_onehot]
        y = [train_images, train_labels_encoded]

        if valid_adata is not None:
            valid_adata = remove_sparsity(valid_adata)

            valid_labels_encoded, _ = label_encoder(valid_adata,
                                                    condition_encoder,
                                                    condition_key)
            valid_labels_onehot = to_categorical(valid_labels_encoded,
                                                 num_classes=self.n_conditions)

            valid_images = np.reshape(valid_adata.X, (-1, *self.x_dim))

            x_valid = [valid_images, valid_labels_onehot, valid_labels_onehot]
            y_valid = [valid_images, valid_labels_encoded]

            self.cvae_model.fit(x=x,
                                y=y,
                                epochs=n_epochs,
                                batch_size=batch_size,
                                validation_data=(x_valid, y_valid),
                                shuffle=shuffle,
                                callbacks=callbacks,
                                verbose=fit_verbose)
        else:
            self.cvae_model.fit(x=x,
                                y=y,
                                epochs=n_epochs,
                                batch_size=batch_size,
                                validation_split=0.2,
                                shuffle=shuffle,
                                callbacks=callbacks,
                                verbose=fit_verbose)
        if save:
            self.save_model()