def load_datasets(dataset_name, save_path='data/', url=None): if dataset_name == 'synthetic': gene_dataset = SyntheticDataset() elif dataset_name == 'cortex': gene_dataset = CortexDataset() elif dataset_name == 'brain_large': gene_dataset = BrainLargeDataset(save_path=save_path) elif dataset_name == 'retina': gene_dataset = RetinaDataset(save_path=save_path) elif dataset_name == 'cbmc': gene_dataset = CbmcDataset(save_path=save_path) elif dataset_name == 'brain_small': gene_dataset = BrainSmallDataset(save_path=save_path) elif dataset_name == 'hemato': gene_dataset = HematoDataset(save_path='data/HEMATO/') elif dataset_name == 'pbmc': gene_dataset = PbmcDataset(save_path=save_path) elif dataset_name[-5:] == ".loom": gene_dataset = LoomDataset(filename=dataset_name, save_path=save_path, url=url) elif dataset_name[-5:] == ".h5ad": gene_dataset = AnnDataset(dataset_name, save_path=save_path, url=url) elif ".csv" in dataset_name: gene_dataset = CsvDataset(dataset_name, save_path=save_path) else: raise "No such dataset available" return gene_dataset
def load_datasets(dataset_name, save_path="data/", url=None): if dataset_name == "synthetic": gene_dataset = SyntheticDataset() elif dataset_name == "cortex": gene_dataset = CortexDataset() elif dataset_name == "brain_large": gene_dataset = BrainLargeDataset(save_path=save_path) elif dataset_name == "retina": gene_dataset = RetinaDataset(save_path=save_path) elif dataset_name == "cbmc": gene_dataset = CbmcDataset(save_path=save_path) elif dataset_name == "brain_small": gene_dataset = BrainSmallDataset(save_path=save_path) elif dataset_name == "hemato": gene_dataset = HematoDataset(save_path="data/HEMATO/") elif dataset_name == "pbmc": gene_dataset = PbmcDataset(save_path=save_path) elif dataset_name[-5:] == ".loom": gene_dataset = LoomDataset(filename=dataset_name, save_path=save_path, url=url) elif dataset_name[-5:] == ".h5ad": gene_dataset = AnnDataset(dataset_name, save_path=save_path, url=url) elif ".csv" in dataset_name: gene_dataset = CsvDataset(dataset_name, save_path=save_path) else: raise Exception("No such dataset available") return gene_dataset
def read_Hemato(override=False, verbose=False): preprocessed_path = select_path(os.path.join(DATA_DIR, 'HEMATO_preprocessed'), create_new=True) if override: shutil.rmtree(preprocessed_path) os.mkdir(preprocessed_path) # ====== copy the dataset from scVI ====== # if not os.path.exists(os.path.join(preprocessed_path, 'X')): try: from scvi.dataset import HematoDataset except ImportError: raise RuntimeError("Require `scVI` package for HEMATO dataset") gene_dataset = HematoDataset( save_path=os.path.join(DOWNLOAD_DIR, 'HEMATO/')) X = gene_dataset._X gene_names = np.array(gene_dataset.gene_names) assert len(gene_names) == X.shape[1] y = gene_dataset.meta.values[:, 1:] label_names = np.array(gene_dataset.cell_types_levels) assert len(label_names) == y.shape[1] cell_names = np.array(['Cell#%d' % i for i in range(X.shape[0])]) _save_data_to_path(preprocessed_path, X, y, gene_names, label_names, cell_names, verbose) # create a binary classes for testing label_names = np.array(["Erythroblasts", "Granulocytes"]) min_y = np.min(gene_dataset.labels) max_y = np.max(gene_dataset.labels) y_val = 2 * (gene_dataset.labels - min_y) / (max_y - min_y) - 1 y_bin = np.argmax( np.hstack(( gene_dataset.meta.iloc[:, 1].values[:, None], # Er gene_dataset.meta.iloc[:, 2].values[:, None])), # Gr axis=-1) with open(os.path.join(preprocessed_path, 'labels_name'), 'wb') as f: pickle.dump(label_names, f) with open(os.path.join(preprocessed_path, 'labels_bin'), 'wb') as f: pickle.dump(y_bin, f) with open(os.path.join(preprocessed_path, 'labels_val'), 'wb') as f: pickle.dump(y_val, f) # ====== read preprocessed data ====== # ds = Dataset(preprocessed_path, read_only=True) return ds
def test_populate(self): dataset = HematoDataset(save_path="tests/data/HEMATO") unsupervised_training_one_epoch(dataset)
def test_hemato(): hemato_dataset = HematoDataset(save_path='tests/data/HEMATO/') base_benchmark(hemato_dataset)
def test_hemato(save_path): hemato_dataset = HematoDataset( save_path=os.path.join(save_path, 'HEMATO/')) base_benchmark(hemato_dataset)