示例#1
0
    def di_impute(self, data):
        from deepimpute.multinet import MultiNet

        model = MultiNet(ncores=self.ncores)
        model.fit(data)
        imputed = model.predict(data)
        return imputed
示例#2
0
    def test_all(self):
        rawData = test_data.rawData
        idx = rawData.quantile(.99).sort_values(ascending=False).index[0:1300]
        rawData = rawData[idx]

        hyperparams = {
            "architecture": [
                {
                    "type": "dense",
                    "activation": "relu",
                    "neurons": 150
                },
                {
                    "type": "dropout",
                    "activation": "dropout",
                    "rate": 0.2
                },
            ],
            "loss":
            "wMSE",
            "sub_outputdim":
            512,
            "seed":
            123,
            "ncores":
            2,
            "verbose":
            1
        }

        model = MultiNet(**hyperparams)
        model.fit(rawData)
        _ = model.predict(rawData, policy="restore")

        print(model.test_metrics)
示例#3
0
def deepImpute(data,
               NN_lim="auto",
               cell_subset=1,
               imputed_only=False,
               policy="restore",
               minVMR=0.5,
               **NN_params):
    from deepimpute.multinet import MultiNet

    multi = MultiNet(**NN_params)
    multi.fit(data, NN_lim=NN_lim, cell_subset=cell_subset, minVMR=minVMR)
    return multi.predict(data, imputed_only=imputed_only, policy=policy)
示例#4
0
def deepImpute(data,
               NN_lim="auto",
               n_cores=10,
               cell_subset=None,
               imputed_only=False,
               policy="restore",
               seed=0,
               **NN_params):

    multi = MultiNet(n_cores=n_cores, seed=seed, **NN_params)
    multi.fit(data, NN_lim=NN_lim, cell_subset=cell_subset)
    return multi.predict(data, imputed_only=imputed_only, policy=policy)
示例#5
0
def deepImpute(data,
               NN_lim="auto",
               n_cores=10,
               cell_subset=None,
               imputed_only=False,
               restore_pos_values=True,
               seed=0,
               **NN_params):

    multi = MultiNet(n_cores=n_cores, seed=seed, **NN_params)
    multi.fit(data, NN_lim=NN_lim, cell_subset=cell_subset)
    return multi.predict(data,
                         imputed_only=imputed_only,
                         restore_pos_values=restore_pos_values)
示例#6
0
def deepImpute(**kwargs):

    args = parse_args()

    for key, value in kwargs.items():
        setattr(args, key, value)

    data = pd.read_csv(args.inputFile, index_col=0)

    if args.cell_axis == "columns":
        data = data.T

    NN_params = {
        'learning_rate':
        args.learning_rate,
        'batch_size':
        args.batch_size,
        'max_epochs':
        args.max_epochs,
        'ncores':
        args.cores,
        'sub_outputdim':
        args.output_neurons,
        'architecture': [{
            "type": "dense",
            "activation": "relu",
            "neurons": args.hidden_neurons
        }, {
            "type": "dropout",
            "activation": "dropout",
            "rate": args.dropout_rate
        }]
    }

    multi = MultiNet(**NN_params)
    multi.fit(data,
              NN_lim=args.limit,
              cell_subset=args.subset,
              minVMR=args.minVMR,
              n_pred=args.n_pred)

    imputed = multi.predict(data, imputed_only=False, policy=args.policy)

    if args.output is not None:
        imputed.to_csv(args.output)
    else:
        return imputed
示例#7
0
    def test_all(self):
        rawData = test_data.rawData
        idx = rawData.quantile(.99).sort_values(ascending=False).index[0:900]
        rawData = rawData[idx]

        hyperparams = {
            "layers": [
                {
                    "label": "dense",
                    "activation": "relu",
                    "nb_neurons": 150
                },
                {
                    "label": "dropout",
                    "activation": "dropout",
                    "rate": 0.2
                },
                {
                    "label": "dense",
                    "activation": "relu"
                },
            ],
            "loss":
            "wMSE",
            "optimizer":
            "Adam",
            "dims": [20, 500],
            "preproc":
            "log_or_exp",
            "seed":
            1,
            "ncores":
            4,
        }

        model = MultiNet(**hyperparams)
        model.fit(rawData)
        _ = model.predict(rawData)

        print(model.score(rawData))
示例#8
0
    def test_all(self):
        rawData = test_data.rawData
        idx = rawData.quantile(.99).sort_values(ascending=False).index[0:900]
        rawData = rawData[idx]

        hyperparams = {
            'layers': [{
                'label': 'dense',
                'activation': 'relu',
                'nb_neurons': 150
            }, {
                'label': 'dropout',
                'activation': 'dropout',
                'rate': 0.2
            }, {
                'label': 'dense',
                'activation': 'relu'
            }],
            'loss':
            'mean_squared_error',
            'optimizer':
            'AdamOptimizer',
            'dims': [20, 500],
            'preproc':
            'log_or_exp',
            'seed':
            1,
            'ncores':
            4
        }

        model = MultiNet(**hyperparams)
        model.fit(rawData)
        _ = model.predict(rawData)

        print(model.score(rawData))
示例#9
0
#-------------------------# DeepImpute #-------------------------#

cellRatios = [0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 0.9, 1]

for cellRatio in cellRatios:
    for nb in range(n_iter):
        print("Cellratio: {} (iteration {})".format(cellRatio, nb))

        output_name = "{}/deepImpute_{}_{}.npy".format(outputdir, cellRatio,
                                                       nb)

        if not os.path.exists(output_name):
            model = MultiNet(ncores=ncores, verbose=0)
            model.fit(raw, cell_subset=cellRatio)
            imputed = model.predict(raw, imputed_only=True)
            np.save(output_name, imputed.values)
            np.save('{}/imputed_genes.npy'.format(outputdir), imputed.columns)

imputed_genes = np.load('{}/imputed_genes.npy'.format(outputdir))
truth = truth[imputed_genes].values
mask = mask[imputed_genes].values

#-------------------------# MSE and Pearson #-------------------------#

scores = []

for ratio in cellRatios:
    for nb in range(n_iter):
        imputed = np.load("{}/deepImpute_{}_{}.npy".format(
            outputdir, ratio, nb))
示例#10
0
            "rate": 0.3
        }]
    }

    multinet = MultiNet(**NN_params)

    start_time, start_resources = timestamp(), resource_usage(RUSAGE_SELF)
    mem_registered = memory_usage((multinet.fit, (tmp, ), {
        'cell_subset': 1,
        'minVMR': 0.5
    }),
                                  retval=False,
                                  max_usage=True,
                                  include_children=True)
    end_resources, end_time = resource_usage(RUSAGE_SELF), timestamp()
    imputedData = multinet.predict(tmp)

    real = end_time - start_time
    systime = end_resources.ru_stime - start_resources.ru_stime
    usertime = end_resources.ru_utime - start_resources.ru_utime
    cpu_time = systime + usertime

    imputedData = np.round(imputedData)
    imputedData.transpose().to_csv(denoised_name, index=True, header=True)
    file = open(f"{outputDir}/{algorithm}_runtime.csv", "a+")
    file.write(
        f"\n{experiment_name},{algorithm},{str(cpu_time)},{str(real)},{str(mem_registered)}"
    )
    file.close()

import magic
print('Working on {} cells and {} genes'.format(*pbmcLNC.shape))

True in pbmcLNC.columns.duplicated()

def get_duplicate_cols(df: pd.DataFrame) -> pd.Series:
    return pd.Series(df.columns).value_counts()[lambda x: x>1]

get_duplicate_cols(pbmcLNC)

pbmcLNC = pbmcLNC.drop(['Y_RNA', 'Y_RNA', 'U1', '7SK'], axis=1)

pbmcLNC.shape

model.fit(pbmcLNC)

imputedData = model.predict(pbmcLNC)

imputedData.head(1)

import matplotlib.pyplot as plt
import numpy as np

limits = [0,100]

fig,ax = plt.subplots()

jitter = np.random.normal(0,1,pbmcLNC.size) # Add some jittering to better see the point density
ax.scatter(pbmcLNC.values.flatten()+jitter,imputedData.values.flatten(),s=2)
ax.plot(limits,limits,'r-.',linewidth=2)
ax.set_xlim(limits)
ax.set_ylim(limits)
示例#12
0
                    index=handle.get('fish/cells')[:].astype(str),
                    columns=handle.get('fish/genes')[:].astype(str))

cells = handle.get('dropseq/cells')[:].astype(str)
genes = handle.get('dropseq/genes')[:].astype(str)
dropseq = pd.DataFrame(handle.get('dropseq/raw')[:],
                       index=cells,
                       columns=genes)

imputed_data = {}

#-------------------------# DeepImpute #-------------------------#

model = MultiNet(ncores=40)
model.fit(dropseq)
imputed = model.predict(dropseq, imputed_only=True)

# Only compare with imputed gene subset
genes_to_extract = np.intersect1d(imputed.columns, fish.columns)

#-------------------------# Load all imputation results #-------------------------#
print('Loading datasets')
imputed_data = {
    method: pd.DataFrame(handle.get('imputed/{}'.format(method))[:],
                         index=cells,
                         columns=genes)[genes_to_extract]
    for method in imputation_methods[1:-1]
}
imputed_data['raw'] = dropseq[genes_to_extract]
imputed_data['deepImpute'] = imputed[genes_to_extract]