Python Normalizer примеры использования

Язык программирования: Python

Пространство имен/Пакет: deepimpute.normalizer

Класс/Тип: Normalizer

Примеров на hotexamples.com: 5

Python Normalizer - 5 примеров найдено. Это лучшие примеры Python кода для deepimpute.normalizer.Normalizer, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Normalizer(1)

Основные методы

Normalizer (1)

Пример #1

Показать файл

 def test_normalizer(self):
     data = np.ones([3, 5])
     data[0, 2] = 9
     Norm = Normalizer(factorFn=np.sum, activations=[np.exp, np.log])
     Norm.fit(data)
     # print(data)
     data_df = pd.DataFrame(
         data,
         index=['r' + str(ii) for ii in range(data.shape[0])],
         columns=['c' + str(ii) for ii in range(data.shape[1])])
     data_norm = Norm.transform(data_df)

Пример #2

Показать файл

    def __init__(self,
                 n_cores=4,
                 predictorLimit=10,
                 preproc='log_or_exp',
                 runDir=os.path.join(tempfile.gettempdir(), 'run'),
                 seed=0,
                 **NN_params):
        self._maxcores = n_cores
        self.predictorLimit = predictorLimit
        self.norm = Normalizer.fromName(preproc)
        self.runDir = runDir
        self.seed = seed
        self.NN_params = NN_params
        self.seed = seed
        self.NN_params['seed'] = seed

        if 'dims' not in self.NN_params.keys():
            self.NN_params['dims'] = [20, 500]

Пример #3

Показать файл

Файл: multinet.py Проект: huangzhii/deepimpute

    def predict(self, data, imputed_only=False, policy="restore"):
        print("Starting prediction")
        df = pd.DataFrame(data)
        normalizer = Normalizer.fromName(self.norm)
        """ Create memory chunk and put the matrix in it """
        idx, cols = df.index, df.columns
        df_norm = normalizer.fit(df).transform(df)
        """ Parallelize process with shared array """
        childJobs = [((12, 15), net.__dict__, (idx, cols), "predict")
                     for net in self.networks]

        output_dicts = self._runOnMultipleCores(self.maxcores,
                                                df_norm.values.flatten(),
                                                childJobs)

        Y_imputed = pd.concat(output_dicts, axis=1)
        Y_imputed = Y_imputed.groupby(by=Y_imputed.columns, axis=1).mean()

        Y_imputed = Y_imputed.mask(Y_imputed > df_norm.values.max(),
                                   df_norm[Y_imputed.columns])

        Y_imputed = normalizer.transform(Y_imputed, rev=True)

        Y_not_imputed = df.drop(Y_imputed.columns, axis=1)

        Y_total = pd.concat([Y_imputed, Y_not_imputed], axis=1)[df.columns]

        if policy == "restore":
            Y_total = Y_total.mask(df > 0, df)
        elif policy == "max":
            Y_total = pd.concat([Y_total, df]).max(level=0)
        else:
            Y_total = Y_total.mask(Y_total == 0, df)

        if imputed_only:
            Y_total = Y_total[Y_imputed.columns]

        if type(data) == type(pd.DataFrame()):
            return Y_total
        else:
            return Y_total.values

Пример #4

Показать файл

    def __init__(self,
                 n_cores=4,
                 predictorLimit=10,
                 preproc="log_or_exp",
                 runDir=os.path.join(tempfile.gettempdir(), "run"),
                 seed=0,
                 **NN_params):
        self._maxcores = n_cores
        self.predictorLimit = predictorLimit
        self.inOutGenes = None
        self.norm = Normalizer.fromName(preproc)
        self.runDir = runDir
        self.seed = seed

        NN_params["seed"] = seed
        if "dims" not in NN_params.keys():
            NN_params["dims"] = [20, 500]
        self.NN_params = NN_params
        self.trainingParams = None

        self._minExpressionLevel = NN_params[
            'minExpressionLevel'] if 'minExpressionLevel' in NN_params else 5

Пример #5

Показать файл

Файл: multinet.py Проект: chitrita/deepimpute

    def fit(self,
            data,
            NN_lim="auto",
            cell_subset=None,
            NN_genes=None,
            retrieve_training=False):
        np.random.seed(seed=self.seed)
        targetGeneNames = NN_genes

        inputExpressionMatrixDF = pd.DataFrame(data)
        print("Input dataset is {} genes (columns) and {} cells (rows)".format(
            inputExpressionMatrixDF.shape[1],
            inputExpressionMatrixDF.shape[0]))
        print("First 3 rows and columns:")
        print(inputExpressionMatrixDF.iloc[0:3, 0:3])

        self._setIDandRundir(inputExpressionMatrixDF)

        # Change the output dimension if the data has too few genes
        if inputExpressionMatrixDF.shape[1] < self.NN_params["dims"][1]:
            self.NN_params["dims"][1] = inputExpressionMatrixDF.shape[1]

        subnetOutputColumns = self.NN_params["dims"][1]

        # Choose genes to impute
        # geneCounts = inputExpressionMatrixDF.sum().sort_values(ascending=False)
        geneQuantiles = inputExpressionMatrixDF.quantile(.99).sort_values(
            ascending=False)

        if targetGeneNames is None:
            targetGeneNames = _get_target_genes(
                geneQuantiles,
                minExpressionLevel=self._minExpressionLevel,
                maxNumOfGenes=NN_lim)

        df_to_impute = inputExpressionMatrixDF[targetGeneNames]

        numberOfTargetGenes = len(targetGeneNames)
        if (numberOfTargetGenes == 0):
            raise Exception(
                "Unable to compute any target genes. Is your data log transformed? Perhaps try with a lower minExpressionLevel."
            )

        n_runs, n_cores = self._getRunsAndCores(numberOfTargetGenes)

        # ------------------------# Subnetworks #------------------------#

        n_choose = int(numberOfTargetGenes / subnetOutputColumns)

        subGenelists = np.random.choice(targetGeneNames,
                                        [n_choose, subnetOutputColumns],
                                        replace=False).tolist()

        if n_choose < n_runs:
            # Special case: for the last run, the output layer will have previous targets
            selectedGenes = np.reshape(subGenelists, -1)
            leftOutGenes = np.setdiff1d(targetGeneNames, selectedGenes)

            fill_genes = np.random.choice(targetGeneNames,
                                          subnetOutputColumns -
                                          len(leftOutGenes),
                                          replace=False)

            subGenelists.append(
                np.concatenate([leftOutGenes, fill_genes]).tolist())

        # ------------------------# Extracting input genes #------------------------#

        corrMatrix = 1 - np.abs(
            pd.DataFrame(np.corrcoef(df_to_impute.T),
                         index=targetGeneNames,
                         columns=targetGeneNames))

        if self.inOutGenes is None:

            self.inOutGenes = get_input_genes(
                df_to_impute,
                self.NN_params["dims"],
                distanceMatrix=corrMatrix,
                targets=subGenelists,
                #predictorDropoutLimit=self.predictorDropoutLimit
            )

        # ------------------------# Subsets for fitting #------------------------#

        n_cells = df_to_impute.shape[0]

        if type(cell_subset) is float or cell_subset == 1:
            n_cells = int(cell_subset * n_cells)

        elif type(cell_subset) is int:
            n_cells = cell_subset

        self.trainCells = df_to_impute.sample(n_cells, replace=False).index

        print(
            "Starting training with {} cells ({:.1%}) on {} threads ({} cores/thread)."
            .format(
                n_cells,
                1. * n_cells / df_to_impute.shape[0],
                n_cores,
                self.NN_params["n_cores"],
            ))

        if self.trainingParams is None:
            self.trainingParams = [self.NN_params] * len(self.inOutGenes)

        # -------------------# Preprocessing (if any) #--------------------#

        normalizer = Normalizer.fromName(self.norm)

        df_to_impute = normalizer.fit(df_to_impute).transform(df_to_impute)

        # -------------------# Share matrix between subprocesses #--------------------#
        """ Create memory chunk and put the matrix in it """
        idx, cols = self.trainCells, df_to_impute.columns
        trainData = df_to_impute.loc[self.trainCells, :].values
        """ Parallelize process with shared array """
        childJobs = [(in_out, trainingParams, (idx, cols), "train",
                      retrieve_training) for in_out, trainingParams in zip(
                          self.inOutGenes, self.trainingParams)]

        self.trainingParams = self._runOnMultipleCores(n_cores,
                                                       trainData.flatten(),
                                                       childJobs)

        self.networks = []
        for dictionnary in self.trainingParams:
            self.networks.append(Net(**dictionnary))

        print('---- Hyperparameters summary ----')
        self.networks[0].display_params()

        return self