示例#1
0
    def initialize(self):

        self.excluded = load_excluded(self)

        self.annotations_dict = load_annotations_dict(self)
        subset_annotations(self)

        self.attributes_dict = load_attributes_dict(self)
        self.attributes_indexes = get_indexes(self)
        subset_attributes(self)
        self.cells_dict = load_cells_dict(self)
        subset_cells(self)
示例#2
0
    def initialize(self):

        if self.is_init:

            if self.annotations is not None:
                self.excluded = load_excluded(self)
                self.annotations_dict = load_annotations_dict(self)
                subset_annotations(self)

            self.observables_dict = load_observables_dict(self)
            self.observables_categorical_dict = load_observables_categorical_dict(
                self)
            self.is_observables_categorical = get_is_observables_categorical(
                self)
            self.attributes_indexes = get_indexes(self)
            subset_observables(self)

            if self.attributes.cells is not None:
                self.cells_dict = load_cells_dict(self)
                subset_cells(self)
示例#3
0
 def test_subset_cells(self):
     self.config.cells_dict = load_cells_dict(self.config)
     self.config.attributes_indexes = list(range(5))
     subset_cells(self.config)
     self.assertEqual(self.config.cells_dict['CD8T'], [0, 0, 0.006011666, 0, 0])
示例#4
0
def load_betas_adj(config):

    fn_dict = get_data_base_path(config) + '/' + 'betas_adj_dict.pkl'

    suffix = ''
    if bool(config.experiment.data_params):
        data_params = config.experiment.data_params
        suffix += '_' + config.experiment.get_data_params_str()
    else:
        raise ValueError(f'Exog for residuals is empty.')

    fn_data = get_data_base_path(config) + '/' + 'betas_adj' + suffix + '.npz'

    if os.path.isfile(fn_dict) and os.path.isfile(fn_data):

        f = open(fn_dict, 'rb')
        config.betas_adj_dict = pickle.load(f)
        f.close()

        data = np.load(fn_data)
        config.betas_adj_data = data['data']

    else:

        config.experiment.data_params = {}
        load_betas(config)

        config.betas_adj_dict = config.betas_dict
        f = open(fn_dict, 'wb')
        pickle.dump(config.betas_adj_dict, f, pickle.HIGHEST_PROTOCOL)
        f.close()

        exog_dict = {}

        if 'cells' in data_params:

            cells_dict = load_cells_dict(config)

            if isinstance(data_params['cells'], list):
                all_types = list(cells_dict.keys())
                for key in all_types:
                    if key not in data_params['cells']:
                        cells_dict.pop(key)

                if len(list(cells_dict.keys())) != len(data_params['cells']):
                    raise ValueError(f'Wrong number of cells types.')

                exog_dict.update(cells_dict)

        if 'observables' in data_params:

            observables_dict = load_observables_dict(config)
            if isinstance(data_params['observables'], list):
                all_types = list(observables_dict.keys())
                for key in all_types:
                    if key not in data_params['observables']:
                        observables_dict.pop(key)

                if len(list(observables_dict.keys())) != len(
                        data_params['observables']):
                    raise ValueError(f'Wrong number of observables types.')

                exog_dict.update(observables_dict)

        exog_df = pd.DataFrame(exog_dict)

        num_cpgs = config.betas_data.shape[0]
        num_subjects = config.betas_data.shape[1]
        config.betas_adj_data = np.zeros((num_cpgs, num_subjects),
                                         dtype=np.float32)

        for cpg, row in tqdm(config.betas_dict.items(),
                             mininterval=60.0,
                             desc='betas_adj_data creating'):
            betas = config.betas_data[row, :]

            mean = np.mean(betas)

            endog_dict = {cpg: betas}
            endog_df = pd.DataFrame(endog_dict)

            reg_res = sm.OLS(endog=endog_df, exog=exog_df).fit()

            residuals = list(map(np.float32, reg_res.resid))
            betas_adj = residuals + mean
            config.betas_adj_data[row] = betas_adj

        np.savez_compressed(fn_data, data=config.betas_adj_data)

        # Clear data
        del config.betas_data
示例#5
0
def load_resid_old(config):

    suffix = ''
    if bool(config.experiment.data_params):
        data_params = config.experiment.data_params
        suffix += '_' + config.experiment.get_data_params_str()
    else:
        raise ValueError('Exog for resid_old is empty.')

    fn_dict = get_data_base_path(
        config) + '/' + 'resid_old_dict' + suffix + '.pkl'
    fn_missed_dict = get_data_base_path(
        config) + '/' + 'resid_old_missed_dict' + suffix + '.pkl'
    fn_data = get_data_base_path(config) + '/' + 'resid_old' + suffix + '.npz'

    if os.path.isfile(fn_dict) and os.path.isfile(fn_data):

        f = open(fn_dict, 'rb')
        config.resid_old_dict = pickle.load(f)
        f.close()

        f = open(fn_missed_dict, 'rb')
        config.resid_old_missed_dict = pickle.load(f)
        f.close()

        data = np.load(fn_data)
        config.resid_old_data = data['data']

    else:

        data_params_copy = copy.deepcopy(config.experiment.data_params)
        common_keys = ['norm']
        config.experiment.data_params = {}
        for key in common_keys:
            if key in data_params_copy:
                config.experiment.data_params[key] = data_params_copy[key]

        load_betas(config)

        config.resid_old_dict = config.betas_dict
        f = open(fn_dict, 'wb')
        pickle.dump(config.resid_old_dict, f, pickle.HIGHEST_PROTOCOL)
        f.close()

        config.resid_old_missed_dict = config.betas_missed_dict
        f = open(fn_missed_dict, 'wb')
        pickle.dump(config.resid_old_missed_dict, f, pickle.HIGHEST_PROTOCOL)
        f.close()

        exog_dict = {}

        if 'cells' in data_params:

            cells_dict = load_cells_dict(config)

            if isinstance(data_params['cells'], list):
                all_types = list(cells_dict.keys())
                for key in all_types:
                    if key not in data_params['cells']:
                        cells_dict.pop(key)

                if len(list(cells_dict.keys())) != len(data_params['cells']):
                    raise ValueError('Wrong number of cells types.')

                exog_dict.update(cells_dict)

        if 'observables' in data_params:
            observables_categorical_dict = load_observables_categorical_dict(
                config)
            if isinstance(data_params['observables'], list):
                all_types = list(observables_categorical_dict.keys())
                for key in all_types:
                    if key not in data_params['observables']:
                        observables_categorical_dict.pop(key)

                if len(list(observables_categorical_dict.keys())) != len(
                        data_params['observables']):
                    raise ValueError('Wrong number of observables types.')

                exog_dict.update(observables_categorical_dict)

        num_cpgs = config.betas_data.shape[0]
        num_subjects = config.betas_data.shape[1]
        config.resid_old_data = np.zeros((num_cpgs, num_subjects),
                                         dtype=np.float32)

        for cpg, row in tqdm(config.betas_dict.items(),
                             mininterval=60.0,
                             desc='resid_old_data creating'):
            raw_betas = config.betas_data[row, :]

            current_exog_dict = copy.deepcopy(exog_dict)

            if len(config.betas_missed_dict[cpg]) > 0:

                for key in current_exog_dict:
                    values = []
                    for value_id in range(0, len(current_exog_dict[key])):
                        if value_id not in config.betas_missed_dict[cpg]:
                            values.append(current_exog_dict[key][value_id])
                    current_exog_dict[key] = values

                betas = []
                passed_ids = []
                for beta_id in range(0, len(raw_betas)):
                    if beta_id not in config.betas_missed_dict[cpg]:
                        betas.append(raw_betas[beta_id])
                        passed_ids.append(beta_id)
            else:

                betas = raw_betas
                passed_ids = list(range(0, len(betas)))

            endog_dict = {cpg: betas}
            endog_df = pd.DataFrame(endog_dict)
            exog_df = pd.DataFrame(current_exog_dict)

            reg_res = sm.OLS(endog=endog_df, exog=exog_df).fit()

            resid_old = list(map(np.float32, reg_res.resid))
            resid_old_raw = np.zeros(num_subjects, dtype=np.float32)
            for beta_id in range(0, len(passed_ids)):
                resid_old_raw[passed_ids[beta_id]] = resid_old[beta_id]

            for missed_id in config.resid_old_missed_dict[cpg]:
                resid_old_raw[missed_id] = np.float32('nan')

            config.resid_old_data[row] = resid_old_raw

        np.savez_compressed(fn_data, data=config.resid_old_data)

        # Clear data
        del config.betas_data