Пример #1
0
 def compute_diffexp_ttest(self, maskA, maskB, top_n=None, lfc_cutoff=None):
     if top_n is None:
         top_n = self.dataset_config.diffexp__top_n
     if lfc_cutoff is None:
         lfc_cutoff = self.dataset_config.diffexp__lfc_cutoff
     return diffexp_generic.diffexp_ttest(self, maskA, maskB, top_n,
                                          lfc_cutoff)
Пример #2
0
 def test_cxg_generic(self):
     """Test a cxg adaptor with the generic adaptor"""
     adaptor = self.load_dataset(f"{FIXTURES_ROOT}/pbmc3k.cxg")
     maskA = self.get_mask(adaptor, 1, 10)
     maskB = self.get_mask(adaptor, 2, 10)
     # run it directly
     results = diffexp_generic.diffexp_ttest(adaptor, maskA, maskB, 10)
     self.check_1_10_2_10(results)
Пример #3
0
def DEG(data):
  adata = None;
  genes = data['genes']
  data['genes'] = []
  comGrp = 'cellGrp'
  if 'combine' in data.keys():
    if data['DEmethod']=='default':
      combUpdate, obs = getObs(data)
      if combUpdate and len(data['grp'])>1:
        obs[comGrp] = obs[data['grp'][0]]
        for i in data['grp']:
          if i!=data['grp'][0]:
            obs[comGrp] += ":"+obs[i]
      mask = [obs[comGrp].isin([data['comGrp'][i]]) for i in [0,1]]
    else:
      data['figOpt']['scale'] = 'false'
      adata = createData(data)
      comGrp = data['grp'][0]
      adata = adata[adata.obs[comGrp].isin(data['comGrp'])]
  else:
    mask = [pd.Series(range(data['cellN'])).isin(data['cells'][one].values()) for one in data['comGrp']]
    for one in data['comGrp']:
      oneD = data.copy()
      oneD['cells'] = data['cells'][one]
      oneD['genes'] = []
      oneD['grp'] = []
      oneD['figOpt']['scale']='false'
      #oneD = {'cells':data['cells'][one],
      #        'genes':[],
      #        'grp':[],
      #        'figOpt':{'scale':'false'},
      #        'url':data['url']}
              
      D = createData(oneD)
      D.obs[comGrp] = one
      if adata is None:
        adata = D
      else:
        adata = adata.concatenate(D)

  if data['DEmethod']=='default':
    if sum(mask[0]==True)<10 or sum(mask[1]==True)<10:
      raise ValueError('Less than 10 cells in a group!')
    with app.get_data_adaptor(url_dataroot=data['url_dataroot'],dataset=data['dataset']) as scD:
      res = diffDefault.diffexp_ttest(scD,mask[0].to_numpy(),mask[1].to_numpy(),scD.data.shape[0])
      gNames = list(scD.data.var["name_0"])
    deg = pd.DataFrame(res,columns=['gID','log2fc','pval','qval'])
    gName = pd.Series([gNames[i] for i in deg['gID']],name='gene')
    deg = pd.concat([deg,gName],axis=1).loc[:,['gene','log2fc','pval','qval']]
  else:
    if not 'AnnData' in str(type(adata)):
      raise ValueError('No data extracted by user selection')
    adata.obs.astype('category')
    nm = None
    if data['DEmethod']=='wald': 
      nm = 'nb'
    res = de.test.two_sample(adata,comGrp,test=data['DEmethod'],noise_model=nm)
    deg = res.summary()
    deg = deg.sort_values(by=['qval']).loc[:,['gene','log2fc','pval','qval']]
    deg['log2fc'] = -1 * deg['log2fc']

  ## plot in R
  strF = ('/tmp/DEG%f.csv' % time.time())
  deg.to_csv(strF,index=False)
  #ppr.pprint([strExePath+'/volcano.R',strF,';'.join(genes),data['figOpt']['img'],str(data['figOpt']['fontsize']),str(data['figOpt']['dpi']),str(data['logFC']),data['comGrp'][1],data['comGrp'][0]])
  res = subprocess.run([strExePath+'/volcano.R',strF,';'.join(genes),data['figOpt']['img'],str(data['figOpt']['fontsize']),str(data['figOpt']['dpi']),str(data['logFC']),data['comGrp'][1],data['comGrp'][0]],capture_output=True)#
  img = res.stdout.decode('utf-8')
  os.remove(strF)
  #####
  gInfo = getVar(data)
  deg.index = deg['gene']
  deg = pd.concat([deg,gInfo],axis=1,sort=False)
  #return deg.to_csv()
  
  if not data['topN']=='All':
    deg = deg.iloc[range(int(data['topN'])),]
  #deg.loc[:,'log2fc'] = deg.loc[:,'log2fc'].apply(lambda x: '%.2f'%x)
  #deg.loc[:,'pval'] = deg.loc[:,'pval'].apply(lambda x: '%.4E'%x)
  #deg.loc[:,'qval'] = deg.loc[:,'qval'].apply(lambda x: '%.4E'%x)

  return json.dumps([deg.to_csv(),img])#json.dumps([deg.values.tolist(),img])
Пример #4
0
    def sparse_diffexp(self, apply_col_shift):
        with tempfile.TemporaryDirectory() as dirname:
            # create a sparse matrix
            h5adfile = os.path.join(dirname, "sparse.h5ad")
            create_test_h5ad(h5adfile, 2000, 2000, 10, apply_col_shift)
            adaptor_anndata = self.load_dataset(
                h5adfile, extra_dataset_config=dict(embeddings__names=[]))
            adata = adaptor_anndata.data

            sparsename = os.path.join(dirname, "sparse.cxg")
            write_cxg(adata=adata,
                      container=sparsename,
                      title="sparse",
                      sparse_threshold=11)
            adaptor_sparse = self.load_dataset(sparsename)
            assert adaptor_sparse.open_array("X").schema.sparse
            assert adaptor_sparse.has_array("X_col_shift") == apply_col_shift

            densename = os.path.join(dirname, "dense.cxg")
            write_cxg(adata=adata,
                      container=densename,
                      title="dense",
                      sparse_threshold=0)
            adaptor_dense = self.load_dataset(densename)
            assert not adaptor_dense.open_array("X").schema.sparse
            assert not adaptor_dense.has_array("X_col_shift")

            maskA = self.get_mask(adaptor_anndata, 1, 10)
            maskB = self.get_mask(adaptor_anndata, 2, 10)

            diffexp_results_anndata = diffexp_generic.diffexp_ttest(
                adaptor_anndata, maskA, maskB, 10)
            diffexp_results_sparse = diffexp_cxg.diffexp_ttest(
                adaptor_sparse, maskA, maskB, 10)
            diffexp_results_dense = diffexp_cxg.diffexp_ttest(
                adaptor_dense, maskA, maskB, 10)

            self.compare_diffexp_results(diffexp_results_anndata,
                                         diffexp_results_sparse)
            self.compare_diffexp_results(diffexp_results_anndata,
                                         diffexp_results_dense)

            topcols = np.array([x[0] for x in diffexp_results_anndata])
            cols_anndata = self.get_X_col(adaptor_anndata, topcols)
            cols_sparse = self.get_X_col(adaptor_sparse, topcols)
            cols_dense = self.get_X_col(adaptor_dense, topcols)
            assert cols_anndata.shape[0] == adaptor_sparse.get_shape()[0]
            assert cols_anndata.shape[1] == len(diffexp_results_anndata)

            def convert(mat, cols):
                return decode_matrix_fbs(encode_matrix_fbs(
                    mat, col_idx=cols)).to_numpy()

            cols_anndata = convert(cols_anndata, topcols)
            cols_sparse = convert(cols_sparse, topcols)
            cols_dense = convert(cols_dense, topcols)

            x = adaptor_sparse.get_X_array()
            assert x.shape == adaptor_sparse.get_shape()

            for row in range(cols_anndata.shape[0]):
                for col in range(cols_anndata.shape[1]):
                    vanndata = cols_anndata[row][col]
                    vsparse = cols_sparse[row][col]
                    vdense = cols_dense[row][col]
                    self.assertTrue(np.isclose(vanndata, vsparse, 1e-6, 1e-6))
                    self.assertTrue(np.isclose(vanndata, vdense, 1e-6, 1e-6))
Пример #5
0
def main():
    parser = argparse.ArgumentParser("A command to test diffexp")
    parser.add_argument("dataset", help="name of a dataset to load")
    parser.add_argument("-na",
                        "--numA",
                        type=int,
                        help="number of rows in group A")
    parser.add_argument("-nb",
                        "--numB",
                        type=int,
                        help="number of rows in group B")
    parser.add_argument("-va",
                        "--varA",
                        help="obs variable:value to use for group A")
    parser.add_argument("-vb",
                        "--varB",
                        help="obs variable:value to use for group B")
    parser.add_argument("-t",
                        "--trials",
                        default=1,
                        type=int,
                        help="number of trials")
    parser.add_argument("-a",
                        "--alg",
                        choices=("default", "generic", "cxg"),
                        default="default",
                        help="algorithm to use")
    parser.add_argument("-s",
                        "--show",
                        default=False,
                        action="store_true",
                        help="show the results")
    parser.add_argument("-n",
                        "--new-selection",
                        default=False,
                        action="store_true",
                        help="change the selection between each trial")
    parser.add_argument("--seed",
                        default=1,
                        type=int,
                        help="set the random seed")

    args = parser.parse_args()

    app_config = AppConfig()
    app_config.update_server_config(single_dataset__datapath=args.dataset)
    app_config.update_server_config(app__verbose=True)
    app_config.complete_config()

    loader = MatrixDataLoader(args.dataset)
    adaptor = loader.open(app_config)

    if args.show:
        if isinstance(adaptor, CxgAdaptor):
            adaptor.open_array("X").schema.dump()

    random.seed(args.seed)
    np.random.seed(args.seed)
    rows = adaptor.get_shape()[0]

    if args.numA:
        filterA = random.sample(range(rows), args.numA)
    elif args.varA:
        vname, vval = args.varA.split(":")
        filterA = get_filter_from_obs(adaptor, vname, vval)
    else:
        print("must supply numA or varA")
        sys.exit(1)

    if args.numB:
        filterB = random.sample(range(rows), args.numB)
    elif args.varB:
        vname, vval = args.varB.split(":")
        filterB = get_filter_from_obs(adaptor, vname, vval)
    else:
        print("must supply numB or varB")
        sys.exit(1)

    for i in range(args.trials):
        if args.new_selection:
            if args.numA:
                filterA = random.sample(range(rows), args.numA)
            if args.numB:
                filterB = random.sample(range(rows), args.numB)

        maskA = np.zeros(rows, dtype=bool)
        maskA[filterA] = True
        maskB = np.zeros(rows, dtype=bool)
        maskB[filterB] = True

        t1 = time.time()
        if args.alg == "default":
            results = adaptor.compute_diffexp_ttest(maskA, maskB)
        elif args.alg == "generic":
            results = diffexp_generic.diffexp_ttest(adaptor, maskA, maskB)
        elif args.alg == "cxg":
            if not isinstance(adaptor, CxgAdaptor):
                print("cxg only works with CxgAdaptor")
                sys.exit(1)
            results = diffexp_cxg.diffexp_ttest(adaptor, maskA, maskB)

        t2 = time.time()
        print("TIME=", t2 - t1)

    if args.show:
        for res in results:
            print(res)
Пример #6
0
def main():
    parser = argparse.ArgumentParser("A command to test diffexp")
    parser.add_argument("dataset", help="name of a dataset to load")
    parser.add_argument("-na",
                        "--numA",
                        type=int,
                        required=True,
                        help="number of rows in group A")
    parser.add_argument("-nb",
                        "--numB",
                        type=int,
                        required=True,
                        help="number of rows in group B")
    parser.add_argument("-t",
                        "--trials",
                        default=1,
                        type=int,
                        help="number of trials")
    parser.add_argument("-a",
                        "--alg",
                        choices=("default", "generic", "cxg"),
                        default="default",
                        help="algorithm to use")
    parser.add_argument("-s",
                        "--show",
                        default=False,
                        action="store_true",
                        help="show the results")
    parser.add_argument("-n",
                        "--new-selection",
                        default=False,
                        action="store_true",
                        help="change the selection between each trial")
    parser.add_argument("--seed",
                        default=1,
                        type=int,
                        help="set the random seed")

    args = parser.parse_args()

    app_config = AppConfig()
    app_config.single_dataset__datapath = args.dataset
    app_config.server__verbose = True
    app_config.complete_config()

    loader = MatrixDataLoader(args.dataset)
    adaptor = loader.open(app_config)

    if args.show:
        if isinstance(adaptor, CxgAdaptor):
            adaptor.open_array("X").schema.dump()

    numA = args.numA
    numB = args.numB
    rows = adaptor.get_shape()[0]

    random.seed(args.seed)

    if not args.new_selection:
        samples = random.sample(range(rows), numA + numB)
        filterA = samples[:numA]
        filterB = samples[numA:]

    for i in range(args.trials):
        if args.new_selection:
            samples = random.sample(range(rows), numA + numB)
            filterA = samples[:numA]
            filterB = samples[numA:]

        maskA = np.zeros(rows, dtype=bool)
        maskA[filterA] = True
        maskB = np.zeros(rows, dtype=bool)
        maskB[filterB] = True

        t1 = time.time()
        if args.alg == "default":
            results = adaptor.compute_diffexp_ttest(maskA, maskB)
        elif args.alg == "generic":
            results = diffexp_generic.diffexp_ttest(adaptor, maskA, maskB)
        elif args.alg == "cxg":
            if not isinstance(adaptor, CxgAdaptor):
                print("cxg only works with CxgAdaptor")
                sys.exit(1)
            results = diffexp_cxg.diffexp_ttest(adaptor, maskA, maskB)

        t2 = time.time()
        print("TIME=", t2 - t1)

    if args.show:
        for res in results:
            print(res)