示例#1
0
    def test_all(self):
        rawData = test_data.rawData
        idx = rawData.quantile(.99).sort_values(ascending=False).index[0:1300]
        rawData = rawData[idx]

        hyperparams = {
            "architecture": [
                {
                    "type": "dense",
                    "activation": "relu",
                    "neurons": 150
                },
                {
                    "type": "dropout",
                    "activation": "dropout",
                    "rate": 0.2
                },
            ],
            "loss":
            "wMSE",
            "sub_outputdim":
            512,
            "seed":
            123,
            "ncores":
            2,
            "verbose":
            1
        }

        model = MultiNet(**hyperparams)
        model.fit(rawData)
        _ = model.predict(rawData, policy="restore")

        print(model.test_metrics)
示例#2
0
    def di_impute(self, data):
        from deepimpute.multinet import MultiNet

        model = MultiNet(ncores=self.ncores)
        model.fit(data)
        imputed = model.predict(data)
        return imputed
示例#3
0
def run_DI(raw):
    df = pd.DataFrame(raw.X)
    model = MultiNet(ncores=40)
    imputed = model.fit(df).predict(df)

    adata = sc.AnnData(imputed.values)
    adata.obs_names = raw.obs.index
    adata.var_names = raw.var.index
    adata.obs["celltype"] = raw.obs.celltype.values
示例#4
0
def deepImpute(data,
               NN_lim="auto",
               cell_subset=1,
               imputed_only=False,
               policy="restore",
               minVMR=0.5,
               **NN_params):
    from deepimpute.multinet import MultiNet

    multi = MultiNet(**NN_params)
    multi.fit(data, NN_lim=NN_lim, cell_subset=cell_subset, minVMR=minVMR)
    return multi.predict(data, imputed_only=imputed_only, policy=policy)
示例#5
0
def deepImpute(data,
               NN_lim="auto",
               n_cores=10,
               cell_subset=None,
               imputed_only=False,
               policy="restore",
               seed=0,
               **NN_params):

    multi = MultiNet(n_cores=n_cores, seed=seed, **NN_params)
    multi.fit(data, NN_lim=NN_lim, cell_subset=cell_subset)
    return multi.predict(data, imputed_only=imputed_only, policy=policy)
示例#6
0
def deepImpute(data,
               NN_lim="auto",
               n_cores=10,
               cell_subset=None,
               imputed_only=False,
               restore_pos_values=True,
               seed=0,
               **NN_params):

    multi = MultiNet(n_cores=n_cores, seed=seed, **NN_params)
    multi.fit(data, NN_lim=NN_lim, cell_subset=cell_subset)
    return multi.predict(data,
                         imputed_only=imputed_only,
                         restore_pos_values=restore_pos_values)
示例#7
0
def deepImpute(**kwargs):

    args = parse_args()

    for key, value in kwargs.items():
        setattr(args, key, value)

    data = pd.read_csv(args.inputFile, index_col=0)

    if args.cell_axis == "columns":
        data = data.T

    NN_params = {
        'learning_rate':
        args.learning_rate,
        'batch_size':
        args.batch_size,
        'max_epochs':
        args.max_epochs,
        'ncores':
        args.cores,
        'sub_outputdim':
        args.output_neurons,
        'architecture': [{
            "type": "dense",
            "activation": "relu",
            "neurons": args.hidden_neurons
        }, {
            "type": "dropout",
            "activation": "dropout",
            "rate": args.dropout_rate
        }]
    }

    multi = MultiNet(**NN_params)
    multi.fit(data,
              NN_lim=args.limit,
              cell_subset=args.subset,
              minVMR=args.minVMR,
              n_pred=args.n_pred)

    imputed = multi.predict(data, imputed_only=False, policy=args.policy)

    if args.output is not None:
        imputed.to_csv(args.output)
    else:
        return imputed
示例#8
0
    def test_all(self):
        rawData = test_data.rawData
        idx = rawData.quantile(.99).sort_values(ascending=False).index[0:900]
        rawData = rawData[idx]

        hyperparams = {
            "layers": [
                {
                    "label": "dense",
                    "activation": "relu",
                    "nb_neurons": 150
                },
                {
                    "label": "dropout",
                    "activation": "dropout",
                    "rate": 0.2
                },
                {
                    "label": "dense",
                    "activation": "relu"
                },
            ],
            "loss":
            "wMSE",
            "optimizer":
            "Adam",
            "dims": [20, 500],
            "preproc":
            "log_or_exp",
            "seed":
            1,
            "ncores":
            4,
        }

        model = MultiNet(**hyperparams)
        model.fit(rawData)
        _ = model.predict(rawData)

        print(model.score(rawData))
示例#9
0
    def test_all(self):
        rawData = test_data.rawData
        idx = rawData.quantile(.99).sort_values(ascending=False).index[0:900]
        rawData = rawData[idx]

        hyperparams = {
            'layers': [{
                'label': 'dense',
                'activation': 'relu',
                'nb_neurons': 150
            }, {
                'label': 'dropout',
                'activation': 'dropout',
                'rate': 0.2
            }, {
                'label': 'dense',
                'activation': 'relu'
            }],
            'loss':
            'mean_squared_error',
            'optimizer':
            'AdamOptimizer',
            'dims': [20, 500],
            'preproc':
            'log_or_exp',
            'seed':
            1,
            'ncores':
            4
        }

        model = MultiNet(**hyperparams)
        model.fit(rawData)
        _ = model.predict(rawData)

        print(model.score(rawData))
示例#10
0
if not os.path.exists(outputdir):
    os.mkdir(outputdir)

#-------------------------# DeepImpute #-------------------------#

cellRatios = [0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 0.9, 1]

for cellRatio in cellRatios:
    for nb in range(n_iter):
        print("Cellratio: {} (iteration {})".format(cellRatio, nb))

        output_name = "{}/deepImpute_{}_{}.npy".format(outputdir, cellRatio,
                                                       nb)

        if not os.path.exists(output_name):
            model = MultiNet(ncores=ncores, verbose=0)
            model.fit(raw, cell_subset=cellRatio)
            imputed = model.predict(raw, imputed_only=True)
            np.save(output_name, imputed.values)
            np.save('{}/imputed_genes.npy'.format(outputdir), imputed.columns)

imputed_genes = np.load('{}/imputed_genes.npy'.format(outputdir))
truth = truth[imputed_genes].values
mask = mask[imputed_genes].values

#-------------------------# MSE and Pearson #-------------------------#

scores = []

for ratio in cellRatios:
    for nb in range(n_iter):
示例#11
0
    filename = "{}/deepimpute_{:.1f}.npy".format(outputdir, dp_rate)
    geneFilename = "{}/gene_subset.npy".format(outputdir, dataset)

    if not os.path.exists(filename):
        architecture = [
            {
                "type": "dense",
                "neurons": 256,
                "activation": "relu"
            },
            {
                "type": "dropout",
                "rate": dp_rate
            },
        ]
        model = MultiNet(architecture=architecture, ncores=40)
        model.fit(raw)
        prediction = model.predict(raw, imputed_only=True)

        np.save(filename, prediction.values)
        np.save(geneFilename, prediction.columns)

    gene_subset = np.load(geneFilename)
    imputed["{0:.1g}".format(dp_rate)] = pd.DataFrame(np.log1p(
        np.load(filename)),
                                                      index=cells,
                                                      columns=gene_subset)

#------------------------# Import other data matrices #------------------------#

truth = np.log1p(truth.reindex(columns=gene_subset))
示例#12
0
        'ncores':
        8,
        'sub_outputdim':
        outputdim,
        'architecture': [{
            "type": "dense",
            "activation": "relu",
            "neurons": intermediate
        }, {
            "type": "dropout",
            "activation": "dropout",
            "rate": 0.3
        }]
    }

    multinet = MultiNet(**NN_params)

    start_time, start_resources = timestamp(), resource_usage(RUSAGE_SELF)
    mem_registered = memory_usage((multinet.fit, (tmp, ), {
        'cell_subset': 1,
        'minVMR': 0.5
    }),
                                  retval=False,
                                  max_usage=True,
                                  include_children=True)
    end_resources, end_time = resource_usage(RUSAGE_SELF), timestamp()
    imputedData = multinet.predict(tmp)

    real = end_time - start_time
    systime = end_resources.ru_stime - start_resources.ru_stime
    usertime = end_resources.ru_utime - start_resources.ru_utime
PBMC = pd.read_csv('PBMC.tsv', sep='\t')

pbmcLNC = pd.merge(PBMC, lnc, left_on='Unnamed: 0', right_on='name', how='inner')

PBMC.head(5)

PBMC.shape

pbmcLNC = pbmcLNC.set_index('Unnamed: 0')
pbmcLNC = pbmcLNC.drop(['gene','name'], axis=1)
pbmcLNC.shape

"""# Dropout Imputation"""

from deepimpute.multinet import MultiNet
model = MultiNet()

#transpose Data
pbmcLNC = pbmcLNC.transpose()
print('Working on {} cells and {} genes'.format(*pbmcLNC.shape))

True in pbmcLNC.columns.duplicated()

def get_duplicate_cols(df: pd.DataFrame) -> pd.Series:
    return pd.Series(df.columns).value_counts()[lambda x: x>1]

get_duplicate_cols(pbmcLNC)

pbmcLNC = pbmcLNC.drop(['Y_RNA', 'Y_RNA', 'U1', '7SK'], axis=1)

pbmcLNC.shape
示例#14
0
handle = h5py.File('{}/paper_data/FISH.h5'.format(PARENT_DIR))
fish = pd.DataFrame(handle.get('fish/data')[:],
                    index=handle.get('fish/cells')[:].astype(str),
                    columns=handle.get('fish/genes')[:].astype(str))

cells = handle.get('dropseq/cells')[:].astype(str)
genes = handle.get('dropseq/genes')[:].astype(str)
dropseq = pd.DataFrame(handle.get('dropseq/raw')[:],
                       index=cells,
                       columns=genes)

imputed_data = {}

#-------------------------# DeepImpute #-------------------------#

model = MultiNet(ncores=40)
model.fit(dropseq)
imputed = model.predict(dropseq, imputed_only=True)

# Only compare with imputed gene subset
genes_to_extract = np.intersect1d(imputed.columns, fish.columns)

#-------------------------# Load all imputation results #-------------------------#
print('Loading datasets')
imputed_data = {
    method: pd.DataFrame(handle.get('imputed/{}'.format(method))[:],
                         index=cells,
                         columns=genes)[genes_to_extract]
    for method in imputation_methods[1:-1]
}
imputed_data['raw'] = dropseq[genes_to_extract]