Exemplo n.º 1
0
def run_dmr_analyses(data, comparisons, anno, dmr_params, verbose=True):
    """
    Compute DMRs for paired GBM-iNSC comparisons (defined in that order) for all patients
    :param me_data: Pandas dataframe containing M values, columns are samples and rows are probes.
    :param comparisons: Dictionary, each key is a title, each value is a 2-element iterable containing lists of
    sample names. The comparison is run as group 1 - group 2 in each case.
    :param anno:
    :param pids:
    :param dmr_params:
    :return:
    """
    dmr_res_obj = dmr.DmrResults(anno=anno)
    dmr_res_obj.identify_clusters(**dmr_params)
    dmr_res = {}

    for the_ttl, the_samples in comparisons.items():
        logger.info(
            "Comparison %s. Group 1: %s. Group 2: %s.",
            the_ttl,
            ','.join(the_samples[0]),
            ','.join(the_samples[1]),
        )
        the_obj = dmr_res_obj.copy()
        the_obj.test_clusters(data,
                              samples=the_samples,
                              n_jobs=dmr_params['n_jobs'],
                              min_median_change=dmr_params['delta_m_min'],
                              method=dmr_params['dmr_test_method'],
                              alpha=dmr_params['alpha'],
                              **dmr_params['test_kwargs'])
        dmr_res[the_ttl] = the_obj

    return dmr.DmrResultCollection(**dmr_res)
Exemplo n.º 2
0
def compute_dmr_clusters(anno, dmr_params):
    clusters = []
    cid = 0

    for cc in anno.CHR.unique():
        coords = anno.loc[anno.CHR == cc, 'MAPINFO'].sort_values()
        this_clust = dmr.identify_cluster(coords, dmr_params['n_min'],
                                          dmr_params['d_max'])

        for cl in this_clust:
            clusters.append(dmr.ProbeCluster(cl, anno, cluster_id=cid, chr=cc))
            cid += 1
    return dmr.DmrResults(clusters=clusters, anno=anno)
Exemplo n.º 3
0
def compute_cross_dmr(me_data, me_meta, anno, pids, dmr_params, external_references=(('GIBCO', 'NSC'),)):

    obj = dmr.DmrResults(anno=anno)
    obj.identify_clusters(**dmr_params)
    res = {}

    # loop over GBM groups
    for pid1 in pids:
        res.setdefault(pid1, {})
        the_idx1 = me_meta.index.str.contains(pid1) & (me_meta.loc[:, 'type'] == 'GBM')
        # loop over iNSC groups
        for pid2 in pids:
            the_idx2 = me_meta.index.str.contains(pid2) & (me_meta.loc[:, 'type'] == 'iNSC')
            the_idx = the_idx1 | the_idx2
            the_groups = me_meta.loc[the_idx, 'type'].values
            the_samples = me_meta.index[the_idx].groupby(the_groups).values()
            the_obj = obj.copy()
            the_obj.test_clusters(me_data,
                                  samples=the_samples,
                                  n_jobs=dmr_params['n_jobs'],
                                  min_median_change=dmr_params['delta_m_min'],
                                  method=dmr_params['dmr_test_method'],
                                  **dmr_params['test_kwargs']
                                  )
            res[pid1][pid2] = the_obj

        # loop over external reference NSC groups
        for er, er_type in external_references:
            the_idx2 = me_meta.index.str.contains(er) & (me_meta.loc[:, 'type'] == er_type)
            the_idx = the_idx1 | the_idx2
            the_groups = me_meta.loc[the_idx, 'type'].values
            the_samples = me_meta.index[the_idx].groupby(the_groups).values()

            the_obj = obj.copy()
            the_obj.test_clusters(me_data,
                                  samples=the_samples,
                                  n_jobs=dmr_params['n_jobs'],
                                  min_median_change=dmr_params['delta_m_min'],
                                  method=dmr_params['dmr_test_method'],
                                  **dmr_params['test_kwargs']
                                  )
            res[pid1][er] = the_obj

    return dmr.DmrResultCollection(**res)
Exemplo n.º 4
0
def paired_dmr(me_data, me_meta, anno, pids, dmr_params):
    """
    Compute DMRs for paired GBM-iNSC comparisons (defined in that order) for all patients
    :param me_data:
    :param me_meta:
    :param anno:
    :param pids:
    :param dmr_params:
    :return:
    """
    dmr_res_obj = dmr.DmrResults(anno=anno)
    dmr_res_obj.identify_clusters(**dmr_params)
    dmr_res = {}

    for pid in pids:
        the_idx1 = me_meta.index.str.contains(pid) & (me_meta.loc[:, 'type'] == 'GBM')
        the_idx2 = me_meta.index.str.contains(pid) & (me_meta.loc[:, 'type'] == 'iNSC')
        # control comparison order
        the_samples = [
            me_meta.index[the_idx1],
            me_meta.index[the_idx2],
        ]

        # the_idx = the_idx1 | the_idx2
        # the_groups = me_meta.loc[the_idx, 'type'].values
        # the_samples = me_meta.index[the_idx].groupby(the_groups).values()

        the_obj = dmr_res_obj.copy()
        the_obj.test_clusters(me_data,
                              samples=the_samples,
                              n_jobs=dmr_params['n_jobs'],
                              min_median_change=dmr_params['delta_m_min'],
                              method=dmr_params['dmr_test_method'],
                              alpha=dmr_params['alpha'],
                              **dmr_params['test_kwargs']
                              )
        dmr_res[pid] = the_obj

    return dmr.DmrResultCollection(**dmr_res)
Exemplo n.º 5
0
              facecolor='w',
              framealpha=0.5)

    ax.set_ylim([-0.01, 1.01])
    ax.set_xlabel("M value")
    ax.set_ylabel("ECDF")

    fig.tight_layout()
    fig.savefig(os.path.join(outdir, "methylation_ecdf.png"), dpi=200)

    ## TODO: linear interp along y axis to identify regions that could be found DMR due to norming differences?
    ## TODO: apply this to the hGIC project

    # 1) DMR: All shBMI1 vs all scramble, etc... (aggregating cell lines)

    dmr_res_obj = dmr.DmrResults(anno=anno)
    dmr_res_obj.identify_clusters(**dmr_params)

    comparisons = [
        collections.OrderedDict([
            ('shCHD7', ['3021_1_shC', 'C', 'ICb1299_shCHD7', 'p62_3_shChd7']),
            ('scramble', ['3021_1_Scr', 'S', 'ICb1299_Scr', 'p62_3_Scr'])
        ]),
        collections.OrderedDict([
            ('shBMI1', ['3021_1_shB', 'B', 'ICb1299_shBMI1', 'p62_3_shBmi1']),
            ('scramble', ['3021_1_Scr', 'S', 'ICb1299_Scr', 'p62_3_Scr'])
        ]),
        collections.OrderedDict([
            ('shCHD7shBMI1',
             ['3021_1_shB+C', 'B+C', 'ICb1299_shBMI1CHD7', 'p62_3_shB+C']),
            ('scramble', ['3021_1_Scr', 'S', 'ICb1299_Scr', 'p62_3_Scr'])
Exemplo n.º 6
0
    mdat_019_e = mdat['GBM019_P4']
    mdat_019_l = mdat['GBM019Luc_P12']
    mdat_019_exvivo = mdat.loc[:, ['GBM019Luc_P3_PDX1', 'GBM019Luc_P2_PDX2']]

    e_minus_l = pd.Series(mdat_019_e.values - mdat_019_l.values,
                          index=mdat.index)
    exvivo_minus_l = mdat_019_exvivo.subtract(mdat_019_l.values, axis=0)

    raise StopIteration

    # DMR analysis
    dmr_params = consts.DMR_PARAMS
    dmr_params['n_jobs'] = mp.cpu_count()
    anno = loader.load_illumina_methylationepic_annotation()
    anno = anno.loc[mdat.index]
    ffpe_gic_dmrs = dmr.DmrResults(anno=anno)
    ffpe_gic_dmrs.identify_clusters(**dmr_params)

    # use only GIC and FFPE here
    this_mdat = mdat.loc[:,
                         obj.meta.descriptor.isin(['In vitro GIC', 'Bulk GBM']
                                                  )]
    this_samples = this_mdat.columns.groupby(obj.meta.loc[this_mdat.columns,
                                                          'descriptor'])
    samples = [
        this_samples['In vitro GIC'],
        this_samples['Bulk GBM'],
    ]
    ffpe_gic_dmrs.test_clusters(this_mdat,
                                samples=samples,
                                n_jobs=dmr_params['n_jobs'],