dmr_hash_dict)
    filename = 'dmr_results_paired_comparison.%d.pkl' % the_hash
    fn = os.path.join(DMR_LOAD_DIR, filename)

    if os.path.isfile(fn):
        logger.info("Loading pre-computed DMR results from %s", fn)
        dmr_res_s1 = dmr.DmrResultCollection.from_pickle(fn, anno=anno)
    else:
        raise Exception("Unable to locate pre-existing results.")

    # load DE results
    rnaseq_obj = rnaseq_loader.load_by_patient(consts.PIDS,
                                               include_control=False)
    rnaseq_obj.filter_by_sample_name(consts.S1_RNASEQ_SAMPLES)

    the_hash = tsgd.de_results_hash(rnaseq_obj.meta.index.tolist(), de_params)
    filename = 'de_results_paired_comparison.%d.pkl' % the_hash
    fn = os.path.join(DE_LOAD_DIR, filename)

    if os.path.isfile(fn):
        logger.info("Reading S1 DE results from %s", fn)
        with open(fn, 'rb') as f:
            de_res_s1 = pickle.load(f)
    else:
        raise Exception("Unable to locate pre-existing DE results.")

    the_hash = tsgd.dmr_results_hash(meth_obj.meta.index.tolist(),
                                     dmr_hash_dict)
    filename = 'dmr_results_paired_comparison.%d.pkl' % the_hash
    fn = os.path.join(DMR_LOAD_DIR, filename)
Exemplo n.º 2
0
    rna_cc_obj.meta.index.isin(consts.S1_RNASEQ_SAMPLES_INSC))
# FIXME: this is a bug in the loader?
rna_ff_obj.batch_id = rna_ff_obj.meta.batch
rna_cc_obj.batch_id = rna_cc_obj.meta.batch

# store the sample list now for hash dict
rna_sample_names = rna_ff_obj.meta.index.tolist(
) + rna_cc_obj.meta.index.tolist()

# combine
rna_obj = rnaseq_loader.loader.MultipleBatchLoader([rna_cc_obj, rna_ff_obj])
# a few of the type entries in meta are missing
rna_obj.meta['type'] = rna_obj.meta['type'].fillna('GBM')

# load or run DE
the_hash = tsgd.de_results_hash(rna_sample_names, de_params)
filename = 'de_results_paired_comparison.%d.pkl' % the_hash
fn = os.path.join(DE_LOAD_DIR, filename)

if os.path.isfile(fn):
    logger.info("Reading S1 DE results from %s", fn)
    with open(fn, 'rb') as f:
        de_res_full_s1 = pickle.load(f)
else:
    groups_s1 = pd.Series(index=rna_obj.meta.index)
    comparisons_s1 = {}
    for pid in pids:
        groups_s1[(rna_obj.meta.patient_id.fillna('') == pid)
                  & (rna_obj.meta['type'] == 'GBM')] = "GBM%s" % pid
        groups_s1[groups_s1.index.str.contains('NSC')
                  & groups_s1.index.str.contains(pid)] = "iNSC%s" % pid
Exemplo n.º 3
0
import statsmodels.formula.api as sm

logger = log.get_console_logger()

if __name__ == '__main__':
    outdir = output.unique_output_dir()
    DMR_LOAD_DIR = os.path.join(INTERMEDIATE_DIR, 'dmr')
    DE_LOAD_DIR = os.path.join(INTERMEDIATE_DIR, 'de')

    rnaseq_obj = loader.load_by_patient(consts.PIDS)
    rnaseq_obj.filter_by_sample_name(consts.S1_RNASEQ_SAMPLES)

    dat_s1 = rnaseq_obj.data
    meta_s1 = rnaseq_obj.meta.loc[dat_s1.columns]

    the_hash = tsgd.de_results_hash(meta_s1.index.tolist(), consts.DE_PARAMS)
    filename = 'de_results_paired_comparison.%d.pkl' % the_hash
    fn = os.path.join(DE_LOAD_DIR, filename)

    if os.path.isfile(fn):
        logger.info("Reading S1 DE results from %s", fn)
        with open(fn, 'rb') as f:
            de_res_full_s1 = pickle.load(f)
    else:
        raise NotImplementedError(
            "We require a precomputed DE results file (expected this at %s)" %
            fn)

    basedir = os.path.join(INTERMEDIATE_DIR, "assess_reprog_alt1")
    indir = os.path.join(basedir, "results")
    names = [t.replace('.csv', '') for t in os.listdir(indir) if '.csv' in t]
Exemplo n.º 4
0
    #######################################################
    # DE
    #######################################################
    # data
    rnaseq_obj = rnaseq_loader.load_by_patient(pids,
                                               include_control=False,
                                               source='star')
    rnaseq_obj.filter_by_sample_name(consts.S1_RNASEQ_SAMPLES)

    dat_s1 = rnaseq_obj.data
    meta_s1 = rnaseq_obj.meta

    cpm = dat_s1.divide(dat_s1.sum(axis=0), axis=1) * 1e6

    # DE results
    the_hash = tsgd.de_results_hash(meta_s1.index.tolist(), de_params)
    filename = 'de_results_paired_comparison.%d.pkl' % the_hash
    fn = os.path.join(DE_LOAD_DIR, filename)

    if os.path.isfile(fn):
        logger.info("Reading S1 DE results from %s", fn)
        with open(fn, 'rb') as f:
            de_res_full_s1 = pickle.load(f)
    else:
        raise AttributeError(
            "Unable to find pre-computed S1 comparison results.")

    de_res_s1 = dict([(k, v.loc[v.FDR < de_params['fdr']])
                      for k, v in de_res_full_s1.items()])
    vs, vc = setops.venn_from_arrays(*[de_res_s1[pid].index for pid in pids])
    de_res_wide = setops.venn_set_to_wide_dataframe(
    obj_ff = loader.load_by_patient(pids, source='salmon', type='ffpe')
    obj_ff.filter_by_sample_name(consts.FFPE_RNASEQ_SAMPLES)

    # add PID to FFPE metadata
    nh_id = obj_ff.meta.index.str.replace(r'(_?)(DEF|SP).*', '')
    p_id = [NH_ID_TO_PATIENT_ID_MAP[t.replace('_', '-')] for t in nh_id]
    obj_ff.meta.insert(0, 'nh_id', nh_id)
    obj_ff.meta.insert(0, 'patient_id', p_id)

    # pull out logged TPM
    log2_tpm_cc = np.log2(obj_cc.data + eps)
    log2_tpm_ff = np.log2(obj_ff.data + eps)

    # load DE results
    the_hash = tsgd.de_results_hash(obj_cc.meta.index.tolist(),
                                    consts.DE_PARAMS)
    filename = 'de_results_paired_comparison.%d.pkl' % the_hash
    fn = os.path.join(DE_LOAD_DIR, filename)

    if os.path.isfile(fn):
        with open(fn, 'rb') as f:
            de_res_full_s1 = pickle.load(f)
    else:
        raise IOError("No pre-computed results file found: %s" % fn)

    # load genes in GAG-related IPA pathways
    pathways = [
        'Chondroitin Sulfate Biosynthesis',
        'Dermatan Sulfate Biosynthesis',
        'Dermatan Sulfate Biosynthesis (Late Stages)',
        'Chondroitin Sulfate Biosynthesis (Late Stages)',
Exemplo n.º 6
0
# load RNA-Seq data
rna_obj = rnaseq_loader.load_by_patient(pids,
                                        type='cell_culture',
                                        source='star',
                                        include_control=False)

# filter CC to include only HIC and iAPC
rna_obj.filter_samples(
    rna_obj.meta.index.isin(consts.S1_RNASEQ_SAMPLES_IAPC +
                            consts.S1_RNASEQ_SAMPLES_GIC))
# FIXME: this is a bug in the loader?
rna_obj.batch_id = rna_obj.meta.batch

# load or run DE
the_hash = tsgd.de_results_hash(rna_obj.meta.index, de_params)
filename = 'de_results_paired_comparison.%d.pkl' % the_hash
fn = os.path.join(DE_LOAD_DIR, filename)

if os.path.isfile(fn):
    logger.info("Reading S1 DE results from %s", fn)
    with open(fn, 'rb') as f:
        de_res_full_s1 = pickle.load(f)
else:
    groups_s1 = pd.Series(index=rna_obj.meta.index)
    comparisons_s1 = {}
    for pid in pids:
        groups_s1[rna_obj.meta.index.str.contains(pid)
                  & (rna_obj.meta['type'] == 'GBM')] = "GBM%s" % pid
        groups_s1[groups_s1.index.str.contains(pid)
                  & (rna_obj.meta['type'] == 'iAPC')] = "iAPC%s" % pid