def test_other(): np.random.seed(111) import matplotlib matplotlib.rcParams.update(matplotlib.rcParamsDefault) #matplotlib.rcParams[u'figure.figsize'] = (4,3) matplotlib.rcParams[u'text.usetex'] = False import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") #from topslam.simulation.simulate_trajectory import qpcr_simulation #Xsim, simulate_new, t, c, labels, seed = qpcr_simulation(48, 6, 5001) #np.random.seed(3) #Y = simulate_new() #m = GPy.models.BayesianGPLVM(Y, 2, X=Xsim, num_inducing=25) #m.optimize() try: test_data = np.load(os.path.join(basedir, 'test_data_model.npz')) test_init = np.load(os.path.join(basedir, 'test_data_others.npz')) except IOError: raise #SkipTest('not installed by source, skipping plotting tests') labels = test_data['labels'] dims = test_init['dims'].tolist() X_init = test_init['X_init'] m = GPy.models.BayesianGPLVM(test_data['Y'].copy(), 2, num_inducing=25, initialize=False) m.param_array[:] = test_data['model_params'] m.initialize_parameter() from topslam import ManifoldCorrectionKNN mc = ManifoldCorrectionKNN(m, 10) from topslam.plotting import plot_comparison, plot_dist_hist, plot_labels_other, plot_landscape_other plot_comparison(mc, X_init, dims, labels, np.unique(labels), 0) plot_dist_hist(test_data['Y']) X, pt = X_init[:, dims['t-SNE']], test_data['t'] fig, ax = plt.subplots() plot_landscape_other(X, pt, labels, ax=ax) plot_labels_other(X, pt, labels, ax=ax) for do_test in _image_comparison(baseline_images=[ 'other_{}'.format(sub) for sub in [ "comparison", "dist_hist", "landscape_labs", ] ], extensions=extensions): yield do_test
def test_other(): np.random.seed(111) import matplotlib matplotlib.rcParams.update(matplotlib.rcParamsDefault) # matplotlib.rcParams[u'figure.figsize'] = (4,3) matplotlib.rcParams[u"text.usetex"] = False import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") # from topslam.simulation.simulate_trajectory import qpcr_simulation # Xsim, simulate_new, t, c, labels, seed = qpcr_simulation(48, 6, 5001) # np.random.seed(3) # Y = simulate_new() # m = GPy.models.BayesianGPLVM(Y, 2, X=Xsim, num_inducing=25) # m.optimize() try: test_data = np.load(os.path.join(basedir, "test_data_model.npz")) test_init = np.load(os.path.join(basedir, "test_data_others.npz")) except IOError: raise # SkipTest('not installed by source, skipping plotting tests') labels = test_data["labels"] dims = test_init["dims"].tolist() X_init = test_init["X_init"] m = GPy.models.BayesianGPLVM(test_data["Y"].copy(), 2, num_inducing=25, initialize=False) m.param_array[:] = test_data["model_params"] m.initialize_parameter() from topslam import ManifoldCorrectionKNN mc = ManifoldCorrectionKNN(m, 10) from topslam.plotting import plot_comparison, plot_dist_hist, plot_labels_other, plot_landscape_other plot_comparison(mc, X_init, dims, labels, np.unique(labels), 0) plot_dist_hist(test_data["Y"]) X, pt = X_init[:, dims["t-SNE"]], test_data["t"] ax = plot_landscape_other(X, pt, labels) plot_labels_other(X, pt, labels, ax=ax) for do_test in _image_comparison( baseline_images=["other_{}".format(sub) for sub in ["comparison", "dist_hist", "landscape_labs"]], extensions=extensions, ): yield do_test
def example_deng(optimize=True, plot=True): import pandas as pd, os import GPy, numpy as np from topslam.filtering import filter_RNASeq # Reproduceability, BGPLVM has local optima np.random.seed(42) # This is the process of how we loaded the data: ulabels = ['Zygote', '2-cell embryo', 'Early 2-cell blastomere', 'Mid 2-cell blastomere', 'Late 2-cell blastomere', '4-cell blastomere', '8-cell blastomere', '16-cell blastomere', 'Early blastocyst cell', 'Mid blastocyst cell', 'Late blastocyst cell', 'fibroblast', 'adult liver', ] folder_path = os.path.expanduser('~/tmp/Deng') csv_file = os.path.join(folder_path, 'filtered_expression_values.csv') if os.path.exists(csv_file): print('Loading previous filtered data: {}'.format(csv_file)) Y_bgplvm = pd.read_csv(csv_file, index_col=[0,1,2], header=0) else: print('Loading data:') data = GPy.util.datasets.singlecell_rna_seq_deng() if not os.path.exists(folder_path): os.mkdir(folder_path) Ydata = data['Y'].copy() Ydata.columns = Ydata.columns.to_series().apply(str.upper) Ydata = Ydata.reset_index().set_index('index', append=True) Ydata['labels'] = data['labels'].values Ydata = Ydata.set_index('labels', append=True) Ydata = Ydata.reorder_levels([0,2,1]) Ydata = Ydata.reset_index([0,2]).loc[ulabels].set_index(['level_0', 'index'], append=True) Y = Ydata.copy() Y.columns = [c.split('.')[0] for c in Y.columns] Y_bgplvm = filter_RNASeq(Y) print('\nSaving data to tmp file: {}'.format(csv_file)) Y_bgplvm.to_csv(csv_file) labels = Y_bgplvm.index.get_level_values(0).values Ymean = Y_bgplvm.values.mean() Ystd = Y_bgplvm.values.std() Y_m = Y_bgplvm.values Y_m -= Ymean Y_m /= Ystd # get the labels right for split experiments # get the labels right for 8 and split new_8_labels = [] for _l in Y_bgplvm.loc['8-cell blastomere'].index.get_level_values(1): _l = _l.split('-')[0] if not('split' in _l): new_8_labels.append('8') elif not('pooled' in _l): new_8_labels.append('8 split') else: new_8_labels.append('8 split') labels[labels=='8-cell blastomere'] = new_8_labels # get the labels right for 16 and split new_16_labels = [] for _l in Y_bgplvm.loc['16-cell blastomere'].index.get_level_values(1): _l = _l.split('-')[0] if not('split' in _l): new_16_labels.append('16') elif not('pooled' in _l): new_16_labels.append('16 split') else: new_16_labels.append('16 split') labels[labels=='16-cell blastomere'] = new_16_labels ulabels = [] for lab in labels: if lab not in ulabels: ulabels.append(lab) short_labels = labels.copy() _ulabels_convert = np.array([ 'Z',# Z', 'E',# Em', '2',# Bm E', '2',# Bm M', '2',# Bm L', '4', '8', '8 s', '16', '16 s', 'Bz',# E', 'Bz',# M', 'Bz',# L' 'F', 'L' ]) short_ulabels = [] for lab, nlab in zip(ulabels, _ulabels_convert): short_labels[short_labels==lab] = nlab if nlab not in short_ulabels: short_ulabels.append(nlab) from topslam.optimization import run_methods, methods, create_model, optimize_model X_init, dims = run_methods(Y_m, methods) m = create_model(Y_m, X_init, num_inducing=25) m.Ymean = Ymean m.Ystd = Ystd m.data_labels = short_labels m.data_ulabels = short_ulabels m.data = Y_bgplvm m.X_init = X_init m.dims = dims if optimize: optimize_model(m) if plot: mc = ManifoldCorrectionTree(m) plot_comparison(mc, X_init, dims, m.data_labels, m.data_ulabels, 0) return m
def example_deng(optimize=True, plot=True): import pandas as pd, os import GPy, numpy as np from topslam.filtering import filter_RNASeq # Reproduceability, BGPLVM has local optima np.random.seed(42) # This is the process of how we loaded the data: ulabels = [ 'Zygote', '2-cell embryo', 'Early 2-cell blastomere', 'Mid 2-cell blastomere', 'Late 2-cell blastomere', '4-cell blastomere', '8-cell blastomere', '16-cell blastomere', 'Early blastocyst cell', 'Mid blastocyst cell', 'Late blastocyst cell', 'fibroblast', 'adult liver', ] folder_path = os.path.expanduser('~/tmp/Deng') csv_file = os.path.join(folder_path, 'filtered_expression_values.csv') if os.path.exists(csv_file): print('Loading previous filtered data: {}'.format(csv_file)) Y_bgplvm = pd.read_csv(csv_file, index_col=[0, 1, 2], header=0) else: print('Loading data:') data = GPy.util.datasets.singlecell_rna_seq_deng() if not os.path.exists(folder_path): os.mkdir(folder_path) Ydata = data['Y'].copy() Ydata.columns = Ydata.columns.to_series().apply(str.upper) Ydata = Ydata.reset_index().set_index('index', append=True) Ydata['labels'] = data['labels'].values Ydata = Ydata.set_index('labels', append=True) Ydata = Ydata.reorder_levels([0, 2, 1]) Ydata = Ydata.reset_index([0, 2]).loc[ulabels].set_index( ['level_0', 'index'], append=True) Y = Ydata.copy() Y.columns = [c.split('.')[0] for c in Y.columns] Y_bgplvm = filter_RNASeq(Y) print('\nSaving data to tmp file: {}'.format(csv_file)) Y_bgplvm.to_csv(csv_file) labels = Y_bgplvm.index.get_level_values(0).values Ymean = Y_bgplvm.values.mean() Ystd = Y_bgplvm.values.std() Y_m = Y_bgplvm.values Y_m -= Ymean Y_m /= Ystd # get the labels right for split experiments # get the labels right for 8 and split new_8_labels = [] for _l in Y_bgplvm.loc['8-cell blastomere'].index.get_level_values(1): _l = _l.split('-')[0] if not ('split' in _l): new_8_labels.append('8') elif not ('pooled' in _l): new_8_labels.append('8 split') else: new_8_labels.append('8 split') labels[labels == '8-cell blastomere'] = new_8_labels # get the labels right for 16 and split new_16_labels = [] for _l in Y_bgplvm.loc['16-cell blastomere'].index.get_level_values(1): _l = _l.split('-')[0] if not ('split' in _l): new_16_labels.append('16') elif not ('pooled' in _l): new_16_labels.append('16 split') else: new_16_labels.append('16 split') labels[labels == '16-cell blastomere'] = new_16_labels ulabels = [] for lab in labels: if lab not in ulabels: ulabels.append(lab) short_labels = labels.copy() _ulabels_convert = np.array([ 'Z', # Z', 'E', # Em', '2', # Bm E', '2', # Bm M', '2', # Bm L', '4', '8', '8 s', '16', '16 s', 'Bz', # E', 'Bz', # M', 'Bz', # L' 'F', 'L' ]) short_ulabels = [] for lab, nlab in zip(ulabels, _ulabels_convert): short_labels[short_labels == lab] = nlab if nlab not in short_ulabels: short_ulabels.append(nlab) from topslam.optimization import run_methods, methods, create_model, optimize_model X_init, dims = run_methods(Y_m, methods) m = create_model(Y_m, X_init, num_inducing=25) m.Ymean = Ymean m.Ystd = Ystd m.data_labels = short_labels m.data_ulabels = short_ulabels m.data = Y_bgplvm m.X_init = X_init m.dims = dims if optimize: optimize_model(m) if plot: mc = ManifoldCorrectionTree(m) plot_comparison(mc, X_init, dims, m.data_labels, m.data_ulabels, 0) return m