Exemplo n.º 1
0
 def setUpClass(cls):
     super().setUpClass()
     file_type = "parsed"
     cls.database_promoters = internals.DataTpm(file=file_type, nrows=4)
     cls.database_promoters.make_data_celltype_specific(cls.celltype_analyse)
     cls.database_enhancers = internals.DataTpm(file=file_type, nrows=4, data_type="enhancers")
     cls.database_enhancers.make_data_celltype_specific(cls.celltype_analyse)
Exemplo n.º 2
0
 def setUpClass(cls):
     super().setUpClass()
     cls.database_promoters = internals.DataTpm(file=cv.promoter_file_name, nrows=10)
     cls.database_promoters.make_data_celltype_specific(cls.celltype_analyse)
     cls.database_promoters.merge_donors_primary()
     cls.database_enhancers = internals.DataTpm(file=cv.enhancer_file_name, nrows=10, data_type="enhancers")
     cls.database_enhancers.make_data_celltype_specific(cls.celltype_analyse)
     cls.database_enhancers.merge_donors_primary()
Exemplo n.º 3
0
 def setUpClass(cls):
     super().setUpClass()
     cls.database_promoters = internals.DataTpm(file=cv.promoter_file_name, nrows=10)
     cls.database_promoters.make_data_celltype_specific(cls.celltype_analyse)
     cls.database_promoters.merge_donors_primary()
     cls.database_promoters.filter_by_reg_element_sparseness(threshold=50)
     cls.database_enhancers = internals.DataTpm(file=cv.enhancer_file_name, nrows=100, data_type="enhancers")
     cls.database_enhancers.make_data_celltype_specific(cls.celltype_analyse)
     cls.database_enhancers.merge_donors_primary()
     cls.database_enhancers.filter_by_reg_element_sparseness(threshold=50)
Exemplo n.º 4
0
 def setUpClass(cls):
     super().setUpClass()
     cls.database_promoters = internals.DataTpm(file=cv.promoter_file_name, nrows=10)
     cls.database_promoters.make_data_celltype_specific(cls.celltype_analyse)
     cls.database_promoters.merge_donors_primary()
     cls.database_promoters.filter_by_target_celltype_activity(threshold=1)
     cls.database_promoters.define_non_target_celltypes_inactivity(threshold=0.3)
     cls.database_enhancers = internals.DataTpm(file=cv.enhancer_file_name, nrows=10, data_type="enhancers")
     cls.database_enhancers.make_data_celltype_specific(cls.celltype_analyse)
     cls.database_enhancers.merge_donors_primary()
     cls.database_enhancers.filter_by_target_celltype_activity(threshold=0.15)
     cls.database_enhancers.define_non_target_celltypes_inactivity(threshold=0)
Exemplo n.º 5
0
 def setUpClass(cls):
     cls.celltype_analyse = "Bronchial Epithelial Cell"
     cls.data = internals.DataTpm(file="parsed", nrows=None)
     cls.data.make_data_celltype_specific(cls.celltype_analyse)
     cls.data.filter_by_target_celltype_activity(threshold=1)
     cls.data.define_non_target_celltypes_inactivity(threshold=0)
     cls.data.sort_sparseness()
Exemplo n.º 6
0
 def _prepare_data_raw(self, sample_type):
     file_name = self._get_re_file_name()
     data = internals.DataTpm(file=file_name,
                              sample_types=sample_type,
                              data_type=self.data_type)
     data.make_data_celltype_specific(self.cell_type)
     return data
Exemplo n.º 7
0
 def _prepare_data_parsed(self, sample_type, thresholds):
     data = internals.DataTpm(file="parsed",
                              sample_types=sample_type,
                              data_type=self.data_type)
     data.make_data_celltype_specific(self.cell_type)
     data = self._filters(data, thresholds)
     return data
Exemplo n.º 8
0
 def setUpClass(cls):
     cls.data = internals.DataTpm(file="parsed", sample_types="primary cells", data_type="promoters", nrows=20000)
     cls.data.make_data_celltype_specific("Hepatocyte")
     cls.data.filter_by_target_celltype_activity(threshold=1)
     cls.data.filter_by_reg_element_sparseness(threshold=0)
     cls.data.define_non_target_celltypes_inactivity(threshold=0)
     cls.data.sort_sparseness()
     cls.vencodes = internals.Vencodes(cls.data, algorithm="heuristic", number_of_re=4, stop=3)
     cls.vencodes.next(amount=2)
Exemplo n.º 9
0
 def setUpClass(cls):
     super().setUpClass()
     file_type = "parsed"
     cls.data = internals.DataTpm(file=file_type, nrows=4)
     cls.data.make_data_celltype_specific(cls.celltype_analyse)
     cls.data2 = cls.data.copy(deep=True)
     cls.data2.sample_type = "test"
     cls.data3 = cls.data.copy(deep=True)
     cls.data3.data.iloc[0, 0] = 3
     cls.data4 = cls.data.copy(deep=True)
Exemplo n.º 10
0
 def _prepare_data_raw_adding_ctp(self, sample_type, thresholds):
     file_name = self._get_re_file_name()
     data = internals.DataTpm(file=file_name,
                              sample_types="primary cells",
                              data_type=self.data_type)
     data.merge_donors_primary(exclude_target=False)
     data.add_celltype(self.cell_type,
                       file=file_name,
                       sample_types=sample_type,
                       data_type=self.data_type)
     data.make_data_celltype_specific(self.cell_type)
     data = self._filters(data, thresholds)
     return data
Exemplo n.º 11
0
    def _data_raw_cleaner(self):
        data_to_add_ctp = internals.DataTpm(file=self.set_up.file_name,
                                            sample_types=self.set_up.type,
                                            data_type=self.set_up.data_type)

        self.data = internals.DataTpmValidated(self.validate_with,
                                               file=self.set_up.file_name,
                                               sample_types="primary cells",
                                               data_type=self.set_up.data_type)
        self.data.merge_donors_primary(exclude_target=False)
        self.data.add_celltype(self.set_up.cell_type,
                               file=data_to_add_ctp,
                               data_type=self.set_up.data_type)
        self.data.make_data_celltype_specific(self.set_up.cell_type)
        self.data.filter_by_target_celltype_activity(
            threshold=self.set_up.target_celltype_activity)
Exemplo n.º 12
0
 def setUpClass(cls):
     super().setUpClass()
     # main data
     cls.cage_primary = internals.DataTpm(file=cv.promoter_file_name, nrows=20)
     # copies for all different tests
     cls.cage_cancer = cls.cage_primary.copy(deep=True)
     cls.cage_tissue = cls.cage_primary.copy(deep=True)
     cls.cage_primary_rescue = cls.cage_primary.copy(deep=True)
     # adding a cancer celltype
     cls.cage_cancer.add_celltype("small cell lung carcinoma cell line", file=cv.promoter_file_name,
                                  sample_types="cell lines", data_type="promoters")
     # adding a tissue celltype
     # cls.cage_tissue.add_celltype("pituitary gland", file=cv.promoter_file_name,
     #                              sample_types="tissues", data_type="promoters")
     # adding a primary celltype after having removed from the data set
     cls.cage_primary_rescue.remove_celltype("Keratocytes", merged=False)
     cls.cage_primary_rescue.add_celltype("Keratocytes", file=cv.promoter_file_name,
                                          sample_types="primary cells", data_type="promoters")
Exemplo n.º 13
0
 def test_filename(self):
     file_type = cv.promoter_file_name
     database = internals.DataTpm(file=file_type, nrows=4)
     self.assertEqual(os.path.isfile(database._file_path), True)
Exemplo n.º 14
0
 def test_custom(self):
     file_type = "custom"
     database = internals.DataTpm(file=file_type, nrows=4)
     self.assertEqual(os.path.isfile(database._file_path), True)
Exemplo n.º 15
0
 def setUpClass(cls):
     cls.celltype_analyse = "Hepatocyte"
     cls.data = internals.DataTpm(file="parsed", nrows=None)
     cls.data.make_data_celltype_specific(cls.celltype_analyse)
Exemplo n.º 16
0
 def setUp(self):
     file_type = "parsed"
     self.cage_tpm = internals.DataTpm(file=file_type, nrows=4)
     self.cage_tpm.make_data_celltype_specific(self.celltype_analyse)
     self.cols = self.cage_tpm.data.columns.tolist()
Exemplo n.º 17
0
 def setUpClass(cls):
     super().setUpClass()
     file_type = "parsed"
     cls.data = internals.DataTpm(file=file_type, nrows=4)
     cls.data.make_data_celltype_specific(cls.celltype_analyse)
Exemplo n.º 18
0
 def setUpClass(cls):
     super().setUpClass()
     cls.database_promoters = internals.DataTpm(file=cv.promoter_file_name, nrows=4)
     cls.database_enhancers = internals.DataTpm(file=cv.enhancer_file_name, nrows=4, data_type="enhancers")
Exemplo n.º 19
0
 def test_parsed(self):
     file_type = "parsed"
     database = internals.DataTpm(file=file_type, nrows=4)
     database.make_data_celltype_specific(self.celltype_analyse)
     self.assertEqual(os.path.isfile(database._file_path), True)
Exemplo n.º 20
0

class SetUp:
    """set up some variables: """
    data_type = "promoters"
    if data_type == "enhancers":
        file_name = enhancer_file_name
    else:
        file_name = promoter_file_name


# Now you don't need to change anything else
setup = SetUp()

data = internals.DataTpm(file=setup.file_name,
                         sample_types="primary cells",
                         data_type=setup.data_type)
data.merge_donors_primary(exclude_target=False)
data_original = data.copy()
data_cancer = internals.DataTpm(file=setup.file_name,
                                sample_types="cell lines",
                                data_type=setup.data_type)

for celltype in tqdm(cancer_celltype_list, desc="Completed: "):
    data.add_celltype(celltype, file=data_cancer)
    data.make_data_celltype_specific(celltype)
    data.filter_by_target_celltype_activity(threshold=0.0001, binarize=False)

    if isinstance(celltype, dict):
        celltype = list(celltype.keys())[0]
    celltype = gen_utils.str_replace_multi(celltype, {
Exemplo n.º 21
0
    second_data_type = "promoters"
    algorithm = "heuristic"

    target_celltype_activity = 0.1
    reg_element_sparseness = 0
    non_target_celltypes_inactivity = 0

    second_target_celltype_activity = 0.5
    second_reg_element_sparseness = 0
    second_non_target_celltypes_inactivity = 0


# Now you don't need to change anything else
setup = SetUp()
results_final = {}
data = internals.DataTpm(file="parsed", sample_types="primary cells", data_type=setup.data_type)

for celltype in tqdm(setup.re_list, desc="Completed: "):
    data.make_data_celltype_specific(celltype)
    data_copy = data.copy()
    for k in [1, 2]:
        results_celltype = []
        for n in range(50):
            choice = random.sample(range(3), k=k)  # chooses a random int from 0 to 2, to later choose a donor.
            data.filter_by_target_celltype_activity(threshold=setup.target_celltype_activity, donors=choice)
            data.filter_by_reg_element_sparseness(threshold=setup.reg_element_sparseness)
            data.define_non_target_celltypes_inactivity(threshold=setup.non_target_celltypes_inactivity)
            if setup.algorithm != "sampling":
                data.sort_sparseness()

            if setup.algorithm == "sampling":
Exemplo n.º 22
0
algorithm = "heuristic"

if setup.celltype_type == "primary":
    celltype_list = primary_cell_list
    sample_types = "primary cells"
elif setup.celltype_type == "cancer":
    celltype_list = cancer_celltype_list
    sample_types = "cell lines"
else:
    raise AttributeError("Celltype_type - {} - currently not supported".format(
        setup.celltype_type))

results = {}
data = internals.DataTpm(file="parsed",
                         sample_types=sample_types,
                         data_type=setup.first_data_type)
data_second = internals.DataTpm(file="parsed",
                                sample_types=sample_types,
                                data_type=setup.second_data_type)

# cycle your list of cell types:
for celltype in tqdm(celltype_list, desc="Completed: "):
    # prepare first data:
    data.make_data_celltype_specific(celltype)
    data.filter_by_target_celltype_activity(
        threshold=setup.target_celltype_activity)
    data.filter_by_reg_element_sparseness(
        threshold=setup.reg_element_sparseness)
    data.define_non_target_celltypes_inactivity(
        threshold=setup.non_target_celltypes_inactivity)
Exemplo n.º 23
0
 def setUp(self):
     self.cage_primary = internals.DataTpm(file="parsed", nrows=4)
     self.cage_primary.make_data_celltype_specific(self.celltype_analyse)
Exemplo n.º 24
0
 def setUp(self):
     self.cage_primary = internals.DataTpm(file="parsed", nrows=4)
     self.cage_primary.make_data_celltype_specific(self.celltype_analyse)
     self.elements = ['chr10:100027943..100027958,-', 'chr10:100174900..100174956,-']
Exemplo n.º 25
0

# Now you don't need to change anything else
setup = SetUp()

if setup.celltype_type == "cancer":
    sample_types = "cell lines"
elif setup.celltype_type == "primary":
    sample_types = "primary cells"
else:
    raise AttributeError("Celltype_type - {} - currently not supported".format(
        setup.celltype_type))

results_final = {}
data = internals.DataTpm(file="parsed",
                         sample_types=sample_types,
                         data_type=setup.data_type)

# cycle your list of cell types:
for celltype in tqdm(setup.celltypes_list, desc="Completed: "):
    data.make_data_celltype_specific(celltype)
    data_copy = data.copy()

    # Deal with possible dictionaries in celltype list:
    if isinstance(celltype, dict):
        celltype = list(celltype.keys())[0]

    # cycle possible number of combinations of donors:
    donors_number = len(data.ctp_analyse_donors[celltype])
    for k in range(1, donors_number):
        results_celltype = []