del commongenes # discard bad columns tobediscarded = np.logical_or.reduce( ((gene_atb.matrix != 0).sum(axis=0) < 3, (gene_atb.matrix == 0).sum(axis=0) < 3, np.isnan(gene_atb.matrix).any(axis=0))) gene_atb.discard(tobediscarded, axis=1) tobediscarded = np.logical_or((gene_cst.matrix != 0).sum(axis=0) < 3, np.isnan(gene_cst.matrix).any(axis=0)) gene_cst.discard(tobediscarded, axis=1) if gene_atb.shape[0] == 0 or gene_atb.shape[1] == 0 or gene_cst.shape[ 0] == 0 or gene_cst.shape[1] == 0: continue # arbitrary prioritization to break redundancyindex ties later gene_atb.columnmeta[ 'arbitrary_pvalues'] = featureselection.univariate_chisquare( X=gene_atb.matrix, Y=gene_cst.matrix[:, 0] < 0.2)[1] tobediscarded = np.isnan(gene_atb.columnmeta['arbitrary_pvalues']) gene_atb.discard(tobediscarded, axis=1) if gene_atb.shape[0] == 0 or gene_atb.shape[1] == 0: continue # discard redundant features rowstatpreferredorder = np.array(['mean', 'stdv'], dtype='object') atb_atb = atb_atb.tolabels(gene_atb.columnlabels, gene_atb.columnlabels) atb_atb.rowmeta = copy.deepcopy(gene_atb.columnmeta) atb_atb.columnmeta = copy.deepcopy(gene_atb.columnmeta) redundancyindex = (np.abs(atb_atb.matrix) > similarity_threshold).sum(1).astype('float64') for i, rowstat in enumerate(rowstatpreferredorder): if rowstat in atb_atb.rowlabels: redundancyindex[atb_atb.rowlabels == rowstat] += 1 / (2 + i) table = list(
tobediscarded = np.logical_or((gene_atb.matrix != 0).sum(axis=0) < 3, np.isnan(gene_atb.matrix).any(axis=0)) gene_atb.discard(tobediscarded, axis=1) tobediscarded = np.logical_or((gene_cst.matrix != 0).sum(axis=0) < 3, np.isnan(gene_cst.matrix).any(axis=0)) gene_cst.discard(tobediscarded, axis=1) if gene_atb.shape[0] == 0 or gene_atb.shape[1] == 0 or gene_cst.shape[ 0] == 0 or gene_cst.shape[1] == 0: continue # arbitrary prioritization to break redundancyindex ties later gene_atb.columnmeta['arbitrary_pvalues'] = np.zeros(gene_atb.shape[1], dtype='float64') gene_atb.columnmeta['arbitrary_pvalues'][ ~gene_atb. columnmeta['isrowstat']] = featureselection.univariate_chisquare( X=gene_atb.matrix[:, ~gene_atb.columnmeta['isrowstat']], Y=gene_cst.select([], '0'))[1] gene_atb.columnmeta['arbitrary_pvalues'][ gene_atb.columnmeta['isrowstat']] = featureselection.univariate_utest( X=gene_atb.matrix[:, gene_atb.columnmeta['isrowstat']], Y=gene_cst.select([], '0'))[1] tobediscarded = np.isnan(gene_atb.columnmeta['arbitrary_pvalues']) gene_atb.discard(tobediscarded, axis=1) if gene_atb.shape[0] == 0 or gene_atb.shape[1] == 0: continue # discard redundant features rowstatpreferredorder = np.array(['mean', 'stdv'], dtype='object') atb_atb = atb_atb.tolabels(gene_atb.columnlabels, gene_atb.columnlabels) atb_atb.rowmeta = copy.deepcopy(gene_atb.columnmeta) atb_atb.columnmeta = copy.deepcopy(gene_atb.columnmeta) redundancyindex = (np.abs(atb_atb.matrix) >