def test_all(self): rawData = test_data.rawData idx = rawData.quantile(.99).sort_values(ascending=False).index[0:1300] rawData = rawData[idx] hyperparams = { "architecture": [ { "type": "dense", "activation": "relu", "neurons": 150 }, { "type": "dropout", "activation": "dropout", "rate": 0.2 }, ], "loss": "wMSE", "sub_outputdim": 512, "seed": 123, "ncores": 2, "verbose": 1 } model = MultiNet(**hyperparams) model.fit(rawData) _ = model.predict(rawData, policy="restore") print(model.test_metrics)
def di_impute(self, data): from deepimpute.multinet import MultiNet model = MultiNet(ncores=self.ncores) model.fit(data) imputed = model.predict(data) return imputed
def run_DI(raw): df = pd.DataFrame(raw.X) model = MultiNet(ncores=40) imputed = model.fit(df).predict(df) adata = sc.AnnData(imputed.values) adata.obs_names = raw.obs.index adata.var_names = raw.var.index adata.obs["celltype"] = raw.obs.celltype.values
def deepImpute(data, NN_lim="auto", cell_subset=1, imputed_only=False, policy="restore", minVMR=0.5, **NN_params): from deepimpute.multinet import MultiNet multi = MultiNet(**NN_params) multi.fit(data, NN_lim=NN_lim, cell_subset=cell_subset, minVMR=minVMR) return multi.predict(data, imputed_only=imputed_only, policy=policy)
def deepImpute(data, NN_lim="auto", n_cores=10, cell_subset=None, imputed_only=False, policy="restore", seed=0, **NN_params): multi = MultiNet(n_cores=n_cores, seed=seed, **NN_params) multi.fit(data, NN_lim=NN_lim, cell_subset=cell_subset) return multi.predict(data, imputed_only=imputed_only, policy=policy)
def deepImpute(data, NN_lim="auto", n_cores=10, cell_subset=None, imputed_only=False, restore_pos_values=True, seed=0, **NN_params): multi = MultiNet(n_cores=n_cores, seed=seed, **NN_params) multi.fit(data, NN_lim=NN_lim, cell_subset=cell_subset) return multi.predict(data, imputed_only=imputed_only, restore_pos_values=restore_pos_values)
def deepImpute(**kwargs): args = parse_args() for key, value in kwargs.items(): setattr(args, key, value) data = pd.read_csv(args.inputFile, index_col=0) if args.cell_axis == "columns": data = data.T NN_params = { 'learning_rate': args.learning_rate, 'batch_size': args.batch_size, 'max_epochs': args.max_epochs, 'ncores': args.cores, 'sub_outputdim': args.output_neurons, 'architecture': [{ "type": "dense", "activation": "relu", "neurons": args.hidden_neurons }, { "type": "dropout", "activation": "dropout", "rate": args.dropout_rate }] } multi = MultiNet(**NN_params) multi.fit(data, NN_lim=args.limit, cell_subset=args.subset, minVMR=args.minVMR, n_pred=args.n_pred) imputed = multi.predict(data, imputed_only=False, policy=args.policy) if args.output is not None: imputed.to_csv(args.output) else: return imputed
def test_all(self): rawData = test_data.rawData idx = rawData.quantile(.99).sort_values(ascending=False).index[0:900] rawData = rawData[idx] hyperparams = { "layers": [ { "label": "dense", "activation": "relu", "nb_neurons": 150 }, { "label": "dropout", "activation": "dropout", "rate": 0.2 }, { "label": "dense", "activation": "relu" }, ], "loss": "wMSE", "optimizer": "Adam", "dims": [20, 500], "preproc": "log_or_exp", "seed": 1, "ncores": 4, } model = MultiNet(**hyperparams) model.fit(rawData) _ = model.predict(rawData) print(model.score(rawData))
def test_all(self): rawData = test_data.rawData idx = rawData.quantile(.99).sort_values(ascending=False).index[0:900] rawData = rawData[idx] hyperparams = { 'layers': [{ 'label': 'dense', 'activation': 'relu', 'nb_neurons': 150 }, { 'label': 'dropout', 'activation': 'dropout', 'rate': 0.2 }, { 'label': 'dense', 'activation': 'relu' }], 'loss': 'mean_squared_error', 'optimizer': 'AdamOptimizer', 'dims': [20, 500], 'preproc': 'log_or_exp', 'seed': 1, 'ncores': 4 } model = MultiNet(**hyperparams) model.fit(rawData) _ = model.predict(rawData) print(model.score(rawData))
if not os.path.exists(outputdir): os.mkdir(outputdir) #-------------------------# DeepImpute #-------------------------# cellRatios = [0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 0.9, 1] for cellRatio in cellRatios: for nb in range(n_iter): print("Cellratio: {} (iteration {})".format(cellRatio, nb)) output_name = "{}/deepImpute_{}_{}.npy".format(outputdir, cellRatio, nb) if not os.path.exists(output_name): model = MultiNet(ncores=ncores, verbose=0) model.fit(raw, cell_subset=cellRatio) imputed = model.predict(raw, imputed_only=True) np.save(output_name, imputed.values) np.save('{}/imputed_genes.npy'.format(outputdir), imputed.columns) imputed_genes = np.load('{}/imputed_genes.npy'.format(outputdir)) truth = truth[imputed_genes].values mask = mask[imputed_genes].values #-------------------------# MSE and Pearson #-------------------------# scores = [] for ratio in cellRatios: for nb in range(n_iter):
filename = "{}/deepimpute_{:.1f}.npy".format(outputdir, dp_rate) geneFilename = "{}/gene_subset.npy".format(outputdir, dataset) if not os.path.exists(filename): architecture = [ { "type": "dense", "neurons": 256, "activation": "relu" }, { "type": "dropout", "rate": dp_rate }, ] model = MultiNet(architecture=architecture, ncores=40) model.fit(raw) prediction = model.predict(raw, imputed_only=True) np.save(filename, prediction.values) np.save(geneFilename, prediction.columns) gene_subset = np.load(geneFilename) imputed["{0:.1g}".format(dp_rate)] = pd.DataFrame(np.log1p( np.load(filename)), index=cells, columns=gene_subset) #------------------------# Import other data matrices #------------------------# truth = np.log1p(truth.reindex(columns=gene_subset))
'ncores': 8, 'sub_outputdim': outputdim, 'architecture': [{ "type": "dense", "activation": "relu", "neurons": intermediate }, { "type": "dropout", "activation": "dropout", "rate": 0.3 }] } multinet = MultiNet(**NN_params) start_time, start_resources = timestamp(), resource_usage(RUSAGE_SELF) mem_registered = memory_usage((multinet.fit, (tmp, ), { 'cell_subset': 1, 'minVMR': 0.5 }), retval=False, max_usage=True, include_children=True) end_resources, end_time = resource_usage(RUSAGE_SELF), timestamp() imputedData = multinet.predict(tmp) real = end_time - start_time systime = end_resources.ru_stime - start_resources.ru_stime usertime = end_resources.ru_utime - start_resources.ru_utime
PBMC = pd.read_csv('PBMC.tsv', sep='\t') pbmcLNC = pd.merge(PBMC, lnc, left_on='Unnamed: 0', right_on='name', how='inner') PBMC.head(5) PBMC.shape pbmcLNC = pbmcLNC.set_index('Unnamed: 0') pbmcLNC = pbmcLNC.drop(['gene','name'], axis=1) pbmcLNC.shape """# Dropout Imputation""" from deepimpute.multinet import MultiNet model = MultiNet() #transpose Data pbmcLNC = pbmcLNC.transpose() print('Working on {} cells and {} genes'.format(*pbmcLNC.shape)) True in pbmcLNC.columns.duplicated() def get_duplicate_cols(df: pd.DataFrame) -> pd.Series: return pd.Series(df.columns).value_counts()[lambda x: x>1] get_duplicate_cols(pbmcLNC) pbmcLNC = pbmcLNC.drop(['Y_RNA', 'Y_RNA', 'U1', '7SK'], axis=1) pbmcLNC.shape
handle = h5py.File('{}/paper_data/FISH.h5'.format(PARENT_DIR)) fish = pd.DataFrame(handle.get('fish/data')[:], index=handle.get('fish/cells')[:].astype(str), columns=handle.get('fish/genes')[:].astype(str)) cells = handle.get('dropseq/cells')[:].astype(str) genes = handle.get('dropseq/genes')[:].astype(str) dropseq = pd.DataFrame(handle.get('dropseq/raw')[:], index=cells, columns=genes) imputed_data = {} #-------------------------# DeepImpute #-------------------------# model = MultiNet(ncores=40) model.fit(dropseq) imputed = model.predict(dropseq, imputed_only=True) # Only compare with imputed gene subset genes_to_extract = np.intersect1d(imputed.columns, fish.columns) #-------------------------# Load all imputation results #-------------------------# print('Loading datasets') imputed_data = { method: pd.DataFrame(handle.get('imputed/{}'.format(method))[:], index=cells, columns=genes)[genes_to_extract] for method in imputation_methods[1:-1] } imputed_data['raw'] = dropseq[genes_to_extract]