Пример #1
0
def run_dmr_analyses(data, comparisons, anno, dmr_params, verbose=True):
    """
    Compute DMRs for paired GBM-iNSC comparisons (defined in that order) for all patients
    :param me_data: Pandas dataframe containing M values, columns are samples and rows are probes.
    :param comparisons: Dictionary, each key is a title, each value is a 2-element iterable containing lists of
    sample names. The comparison is run as group 1 - group 2 in each case.
    :param anno:
    :param pids:
    :param dmr_params:
    :return:
    """
    dmr_res_obj = dmr.DmrResults(anno=anno)
    dmr_res_obj.identify_clusters(**dmr_params)
    dmr_res = {}

    for the_ttl, the_samples in comparisons.items():
        logger.info(
            "Comparison %s. Group 1: %s. Group 2: %s.",
            the_ttl,
            ','.join(the_samples[0]),
            ','.join(the_samples[1]),
        )
        the_obj = dmr_res_obj.copy()
        the_obj.test_clusters(data,
                              samples=the_samples,
                              n_jobs=dmr_params['n_jobs'],
                              min_median_change=dmr_params['delta_m_min'],
                              method=dmr_params['dmr_test_method'],
                              alpha=dmr_params['alpha'],
                              **dmr_params['test_kwargs'])
        dmr_res[the_ttl] = the_obj

    return dmr.DmrResultCollection(**dmr_res)
def run_dmr(me_meta, me_data, dmr_clusters, str_contains_dict, type1='iPSC', type2='FB', **dmr_params):
    """
    Run DMR as type1 - type2.
    :param me_meta:
    :param me_data:
    :param dmr_clusters:
    :param str_contains_dict: Dictionary. Keys will be used to identify results. Values are 2-tuples with the strings
    used in the meta.index.str.contains() call that separates samples.
    :param type1: Refers to the `type` column in meta.
    :param type2:
    :param dmr_params:
    :return:
    """
    res = {}

    for k, (c1, c2) in str_contains_dict.items():
        this = dmr_clusters.copy()
        the_idx1 = me_meta.index.str.contains(c1) & (me_meta.loc[:, 'type'] == type1)
        the_idx2 = me_meta.index.str.contains(c2) & (me_meta.loc[:, 'type'] == type2)
        the_idx = the_idx1 | the_idx2
        the_groups = me_meta.loc[the_idx, 'type'].values
        the_samples = me_meta.index[the_idx].groupby(the_groups)
        the_samples = [the_samples[type1], the_samples[type2]]

        this.test_clusters(
            me_data,
            samples=the_samples,
            n_jobs=dmr_params['n_jobs'],
            min_median_change=dmr_params['delta_m_min'],
            method=dmr_params['dmr_test_method'],
            alpha=dmr_params['alpha'],
            **dmr_params['test_kwargs']
        )
        res[k] = this
    return dmr.DmrResultCollection(**res)
Пример #3
0
def compute_cross_dmr(me_data, me_meta, anno, pids, dmr_params, external_references=(('GIBCO', 'NSC'),)):

    obj = dmr.DmrResults(anno=anno)
    obj.identify_clusters(**dmr_params)
    res = {}

    # loop over GBM groups
    for pid1 in pids:
        res.setdefault(pid1, {})
        the_idx1 = me_meta.index.str.contains(pid1) & (me_meta.loc[:, 'type'] == 'GBM')
        # loop over iNSC groups
        for pid2 in pids:
            the_idx2 = me_meta.index.str.contains(pid2) & (me_meta.loc[:, 'type'] == 'iNSC')
            the_idx = the_idx1 | the_idx2
            the_groups = me_meta.loc[the_idx, 'type'].values
            the_samples = me_meta.index[the_idx].groupby(the_groups).values()
            the_obj = obj.copy()
            the_obj.test_clusters(me_data,
                                  samples=the_samples,
                                  n_jobs=dmr_params['n_jobs'],
                                  min_median_change=dmr_params['delta_m_min'],
                                  method=dmr_params['dmr_test_method'],
                                  **dmr_params['test_kwargs']
                                  )
            res[pid1][pid2] = the_obj

        # loop over external reference NSC groups
        for er, er_type in external_references:
            the_idx2 = me_meta.index.str.contains(er) & (me_meta.loc[:, 'type'] == er_type)
            the_idx = the_idx1 | the_idx2
            the_groups = me_meta.loc[the_idx, 'type'].values
            the_samples = me_meta.index[the_idx].groupby(the_groups).values()

            the_obj = obj.copy()
            the_obj.test_clusters(me_data,
                                  samples=the_samples,
                                  n_jobs=dmr_params['n_jobs'],
                                  min_median_change=dmr_params['delta_m_min'],
                                  method=dmr_params['dmr_test_method'],
                                  **dmr_params['test_kwargs']
                                  )
            res[pid1][er] = the_obj

    return dmr.DmrResultCollection(**res)
Пример #4
0
def paired_dmr(me_data, me_meta, anno, pids, dmr_params):
    """
    Compute DMRs for paired GBM-iNSC comparisons (defined in that order) for all patients
    :param me_data:
    :param me_meta:
    :param anno:
    :param pids:
    :param dmr_params:
    :return:
    """
    dmr_res_obj = dmr.DmrResults(anno=anno)
    dmr_res_obj.identify_clusters(**dmr_params)
    dmr_res = {}

    for pid in pids:
        the_idx1 = me_meta.index.str.contains(pid) & (me_meta.loc[:, 'type'] == 'GBM')
        the_idx2 = me_meta.index.str.contains(pid) & (me_meta.loc[:, 'type'] == 'iNSC')
        # control comparison order
        the_samples = [
            me_meta.index[the_idx1],
            me_meta.index[the_idx2],
        ]

        # the_idx = the_idx1 | the_idx2
        # the_groups = me_meta.loc[the_idx, 'type'].values
        # the_samples = me_meta.index[the_idx].groupby(the_groups).values()

        the_obj = dmr_res_obj.copy()
        the_obj.test_clusters(me_data,
                              samples=the_samples,
                              n_jobs=dmr_params['n_jobs'],
                              min_median_change=dmr_params['delta_m_min'],
                              method=dmr_params['dmr_test_method'],
                              alpha=dmr_params['alpha'],
                              **dmr_params['test_kwargs']
                              )
        dmr_res[pid] = the_obj

    return dmr.DmrResultCollection(**dmr_res)