def setUpClass(cls): super().setUpClass() file_type = "parsed" cls.database_promoters = internals.DataTpm(file=file_type, nrows=4) cls.database_promoters.make_data_celltype_specific(cls.celltype_analyse) cls.database_enhancers = internals.DataTpm(file=file_type, nrows=4, data_type="enhancers") cls.database_enhancers.make_data_celltype_specific(cls.celltype_analyse)
def setUpClass(cls): super().setUpClass() cls.database_promoters = internals.DataTpm(file=cv.promoter_file_name, nrows=10) cls.database_promoters.make_data_celltype_specific(cls.celltype_analyse) cls.database_promoters.merge_donors_primary() cls.database_enhancers = internals.DataTpm(file=cv.enhancer_file_name, nrows=10, data_type="enhancers") cls.database_enhancers.make_data_celltype_specific(cls.celltype_analyse) cls.database_enhancers.merge_donors_primary()
def setUpClass(cls): super().setUpClass() cls.database_promoters = internals.DataTpm(file=cv.promoter_file_name, nrows=10) cls.database_promoters.make_data_celltype_specific(cls.celltype_analyse) cls.database_promoters.merge_donors_primary() cls.database_promoters.filter_by_reg_element_sparseness(threshold=50) cls.database_enhancers = internals.DataTpm(file=cv.enhancer_file_name, nrows=100, data_type="enhancers") cls.database_enhancers.make_data_celltype_specific(cls.celltype_analyse) cls.database_enhancers.merge_donors_primary() cls.database_enhancers.filter_by_reg_element_sparseness(threshold=50)
def setUpClass(cls): super().setUpClass() cls.database_promoters = internals.DataTpm(file=cv.promoter_file_name, nrows=10) cls.database_promoters.make_data_celltype_specific(cls.celltype_analyse) cls.database_promoters.merge_donors_primary() cls.database_promoters.filter_by_target_celltype_activity(threshold=1) cls.database_promoters.define_non_target_celltypes_inactivity(threshold=0.3) cls.database_enhancers = internals.DataTpm(file=cv.enhancer_file_name, nrows=10, data_type="enhancers") cls.database_enhancers.make_data_celltype_specific(cls.celltype_analyse) cls.database_enhancers.merge_donors_primary() cls.database_enhancers.filter_by_target_celltype_activity(threshold=0.15) cls.database_enhancers.define_non_target_celltypes_inactivity(threshold=0)
def setUpClass(cls): cls.celltype_analyse = "Bronchial Epithelial Cell" cls.data = internals.DataTpm(file="parsed", nrows=None) cls.data.make_data_celltype_specific(cls.celltype_analyse) cls.data.filter_by_target_celltype_activity(threshold=1) cls.data.define_non_target_celltypes_inactivity(threshold=0) cls.data.sort_sparseness()
def _prepare_data_raw(self, sample_type): file_name = self._get_re_file_name() data = internals.DataTpm(file=file_name, sample_types=sample_type, data_type=self.data_type) data.make_data_celltype_specific(self.cell_type) return data
def _prepare_data_parsed(self, sample_type, thresholds): data = internals.DataTpm(file="parsed", sample_types=sample_type, data_type=self.data_type) data.make_data_celltype_specific(self.cell_type) data = self._filters(data, thresholds) return data
def setUpClass(cls): cls.data = internals.DataTpm(file="parsed", sample_types="primary cells", data_type="promoters", nrows=20000) cls.data.make_data_celltype_specific("Hepatocyte") cls.data.filter_by_target_celltype_activity(threshold=1) cls.data.filter_by_reg_element_sparseness(threshold=0) cls.data.define_non_target_celltypes_inactivity(threshold=0) cls.data.sort_sparseness() cls.vencodes = internals.Vencodes(cls.data, algorithm="heuristic", number_of_re=4, stop=3) cls.vencodes.next(amount=2)
def setUpClass(cls): super().setUpClass() file_type = "parsed" cls.data = internals.DataTpm(file=file_type, nrows=4) cls.data.make_data_celltype_specific(cls.celltype_analyse) cls.data2 = cls.data.copy(deep=True) cls.data2.sample_type = "test" cls.data3 = cls.data.copy(deep=True) cls.data3.data.iloc[0, 0] = 3 cls.data4 = cls.data.copy(deep=True)
def _prepare_data_raw_adding_ctp(self, sample_type, thresholds): file_name = self._get_re_file_name() data = internals.DataTpm(file=file_name, sample_types="primary cells", data_type=self.data_type) data.merge_donors_primary(exclude_target=False) data.add_celltype(self.cell_type, file=file_name, sample_types=sample_type, data_type=self.data_type) data.make_data_celltype_specific(self.cell_type) data = self._filters(data, thresholds) return data
def _data_raw_cleaner(self): data_to_add_ctp = internals.DataTpm(file=self.set_up.file_name, sample_types=self.set_up.type, data_type=self.set_up.data_type) self.data = internals.DataTpmValidated(self.validate_with, file=self.set_up.file_name, sample_types="primary cells", data_type=self.set_up.data_type) self.data.merge_donors_primary(exclude_target=False) self.data.add_celltype(self.set_up.cell_type, file=data_to_add_ctp, data_type=self.set_up.data_type) self.data.make_data_celltype_specific(self.set_up.cell_type) self.data.filter_by_target_celltype_activity( threshold=self.set_up.target_celltype_activity)
def setUpClass(cls): super().setUpClass() # main data cls.cage_primary = internals.DataTpm(file=cv.promoter_file_name, nrows=20) # copies for all different tests cls.cage_cancer = cls.cage_primary.copy(deep=True) cls.cage_tissue = cls.cage_primary.copy(deep=True) cls.cage_primary_rescue = cls.cage_primary.copy(deep=True) # adding a cancer celltype cls.cage_cancer.add_celltype("small cell lung carcinoma cell line", file=cv.promoter_file_name, sample_types="cell lines", data_type="promoters") # adding a tissue celltype # cls.cage_tissue.add_celltype("pituitary gland", file=cv.promoter_file_name, # sample_types="tissues", data_type="promoters") # adding a primary celltype after having removed from the data set cls.cage_primary_rescue.remove_celltype("Keratocytes", merged=False) cls.cage_primary_rescue.add_celltype("Keratocytes", file=cv.promoter_file_name, sample_types="primary cells", data_type="promoters")
def test_filename(self): file_type = cv.promoter_file_name database = internals.DataTpm(file=file_type, nrows=4) self.assertEqual(os.path.isfile(database._file_path), True)
def test_custom(self): file_type = "custom" database = internals.DataTpm(file=file_type, nrows=4) self.assertEqual(os.path.isfile(database._file_path), True)
def setUpClass(cls): cls.celltype_analyse = "Hepatocyte" cls.data = internals.DataTpm(file="parsed", nrows=None) cls.data.make_data_celltype_specific(cls.celltype_analyse)
def setUp(self): file_type = "parsed" self.cage_tpm = internals.DataTpm(file=file_type, nrows=4) self.cage_tpm.make_data_celltype_specific(self.celltype_analyse) self.cols = self.cage_tpm.data.columns.tolist()
def setUpClass(cls): super().setUpClass() file_type = "parsed" cls.data = internals.DataTpm(file=file_type, nrows=4) cls.data.make_data_celltype_specific(cls.celltype_analyse)
def setUpClass(cls): super().setUpClass() cls.database_promoters = internals.DataTpm(file=cv.promoter_file_name, nrows=4) cls.database_enhancers = internals.DataTpm(file=cv.enhancer_file_name, nrows=4, data_type="enhancers")
def test_parsed(self): file_type = "parsed" database = internals.DataTpm(file=file_type, nrows=4) database.make_data_celltype_specific(self.celltype_analyse) self.assertEqual(os.path.isfile(database._file_path), True)
class SetUp: """set up some variables: """ data_type = "promoters" if data_type == "enhancers": file_name = enhancer_file_name else: file_name = promoter_file_name # Now you don't need to change anything else setup = SetUp() data = internals.DataTpm(file=setup.file_name, sample_types="primary cells", data_type=setup.data_type) data.merge_donors_primary(exclude_target=False) data_original = data.copy() data_cancer = internals.DataTpm(file=setup.file_name, sample_types="cell lines", data_type=setup.data_type) for celltype in tqdm(cancer_celltype_list, desc="Completed: "): data.add_celltype(celltype, file=data_cancer) data.make_data_celltype_specific(celltype) data.filter_by_target_celltype_activity(threshold=0.0001, binarize=False) if isinstance(celltype, dict): celltype = list(celltype.keys())[0] celltype = gen_utils.str_replace_multi(celltype, {
second_data_type = "promoters" algorithm = "heuristic" target_celltype_activity = 0.1 reg_element_sparseness = 0 non_target_celltypes_inactivity = 0 second_target_celltype_activity = 0.5 second_reg_element_sparseness = 0 second_non_target_celltypes_inactivity = 0 # Now you don't need to change anything else setup = SetUp() results_final = {} data = internals.DataTpm(file="parsed", sample_types="primary cells", data_type=setup.data_type) for celltype in tqdm(setup.re_list, desc="Completed: "): data.make_data_celltype_specific(celltype) data_copy = data.copy() for k in [1, 2]: results_celltype = [] for n in range(50): choice = random.sample(range(3), k=k) # chooses a random int from 0 to 2, to later choose a donor. data.filter_by_target_celltype_activity(threshold=setup.target_celltype_activity, donors=choice) data.filter_by_reg_element_sparseness(threshold=setup.reg_element_sparseness) data.define_non_target_celltypes_inactivity(threshold=setup.non_target_celltypes_inactivity) if setup.algorithm != "sampling": data.sort_sparseness() if setup.algorithm == "sampling":
algorithm = "heuristic" if setup.celltype_type == "primary": celltype_list = primary_cell_list sample_types = "primary cells" elif setup.celltype_type == "cancer": celltype_list = cancer_celltype_list sample_types = "cell lines" else: raise AttributeError("Celltype_type - {} - currently not supported".format( setup.celltype_type)) results = {} data = internals.DataTpm(file="parsed", sample_types=sample_types, data_type=setup.first_data_type) data_second = internals.DataTpm(file="parsed", sample_types=sample_types, data_type=setup.second_data_type) # cycle your list of cell types: for celltype in tqdm(celltype_list, desc="Completed: "): # prepare first data: data.make_data_celltype_specific(celltype) data.filter_by_target_celltype_activity( threshold=setup.target_celltype_activity) data.filter_by_reg_element_sparseness( threshold=setup.reg_element_sparseness) data.define_non_target_celltypes_inactivity( threshold=setup.non_target_celltypes_inactivity)
def setUp(self): self.cage_primary = internals.DataTpm(file="parsed", nrows=4) self.cage_primary.make_data_celltype_specific(self.celltype_analyse)
def setUp(self): self.cage_primary = internals.DataTpm(file="parsed", nrows=4) self.cage_primary.make_data_celltype_specific(self.celltype_analyse) self.elements = ['chr10:100027943..100027958,-', 'chr10:100174900..100174956,-']
# Now you don't need to change anything else setup = SetUp() if setup.celltype_type == "cancer": sample_types = "cell lines" elif setup.celltype_type == "primary": sample_types = "primary cells" else: raise AttributeError("Celltype_type - {} - currently not supported".format( setup.celltype_type)) results_final = {} data = internals.DataTpm(file="parsed", sample_types=sample_types, data_type=setup.data_type) # cycle your list of cell types: for celltype in tqdm(setup.celltypes_list, desc="Completed: "): data.make_data_celltype_specific(celltype) data_copy = data.copy() # Deal with possible dictionaries in celltype list: if isinstance(celltype, dict): celltype = list(celltype.keys())[0] # cycle possible number of combinations of donors: donors_number = len(data.ctp_analyse_donors[celltype]) for k in range(1, donors_number): results_celltype = []