Exemplo n.º 1
0
def test_wr_methods(log=sys.stdout):
    """Demonstrate printing a subset of all available fields using two methods."""
    # 1. Gene Ontology Enrichment Analysis
    #    1a. Initialize: Load ontologies, associations, and population gene IDs
    nature_data = get_goea_results()
    goeaobj = nature_data['goeaobj']
    goea_results = nature_data['goea_results']
    # 2. Write results
    #    Write parameters:
    #    The format_string names below are the same names as in the namedtuple field_names.
    prtfmt = "{GO} {NS} {level:>2} {depth:>2} {p_fdr_bh:5.2e} {study_count:>5} {name}\n"
    wr_params = {
        # Format for printing in text format
        'prtfmt' : prtfmt,
        # Format for p-values in tsv and xlsx format
        'fld2fmt' : {'p_fdr_bh' : '{:8.2e}'},
        # Print a subset namedtuple fields, don't print all fields in namedtuple.
        'prt_flds' : get_fmtflds(prtfmt)
    }
    #    2a. Use the write functions inside the GOEnrichmentStudy class.
    cwddir = os.getcwd()
    tsv_obj = os.path.join(cwddir, 'nbt3102_subset_obj.tsv')
    tsv_nts = os.path.join(cwddir, 'nbt3102_subset_nt.tsv')
    _wr_3fmt_goeaobj(tsv_obj, goea_results, goeaobj, wr_params, log)
    #    2b. Use the write functions from the wr_tbl package to print a list of namedtuples.
    _wr_3fmt_wrtbl(tsv_nts, goea_results, wr_params, log)
    assert filecmp.cmp(tsv_obj, tsv_nts)
Exemplo n.º 2
0
def test_wr_methods(log=sys.stdout):
    """Demonstrate printing a subset of all available fields using two methods."""
    # 1. Gene Ontology Enrichment Analysis
    #    1a. Initialize: Load ontologies, associations, and population gene IDs
    nature_data = get_goea_results()
    goeaobj = nature_data['goeaobj']
    goea_results = nature_data['goea_results']
    # 2. Write results
    #    Write parameters:
    #    The format_string names below are the same names as in the namedtuple field_names.
    prtfmt = "{GO} {NS} {level:>2} {depth:>2} {p_fdr_bh:5.2e} {study_count:>5} {name}\n"
    wr_params = {
        # Format for printing in text format
        'prtfmt': prtfmt,
        # Format for p-values in tsv and xlsx format
        'fld2fmt': {
            'p_fdr_bh': '{:8.2e}'
        },
        # Print a subset namedtuple fields, don't print all fields in namedtuple.
        'prt_flds': get_fmtflds(prtfmt)
    }
    #    2a. Use the write functions inside the GOEnrichmentStudy class.
    cwddir = os.getcwd()
    tsv_obj = os.path.join(cwddir, 'nbt3102_subset_obj.tsv')
    tsv_nts = os.path.join(cwddir, 'nbt3102_subset_nt.tsv')
    _wr_3fmt_goeaobj(tsv_obj, goea_results, goeaobj, wr_params, log)
    #    2b. Use the write functions from the wr_tbl package to print a list of namedtuples.
    _wr_3fmt_wrtbl(tsv_nts, goea_results, wr_params, log)
    assert filecmp.cmp(tsv_obj, tsv_nts)
Exemplo n.º 3
0
def test_wr_methods(log=sys.stdout):
    """Demonstrate printing a subset of all available fields using two methods."""
    # 1. Gene Ontology Enrichment Analysis
    #    1a. Initialize: Load ontologies, associations, and population gene IDs
    taxid = 10090 # Mouse study
    geneids_pop = GeneID2nt_mus.keys() # Mouse protein-coding genes
    goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid)
    #    1b. Run GOEA
    geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
    keep_if = lambda nt: getattr(nt, "p_fdr_bh") < 0.05 # keep if results are significant
    goea_results = goeaobj.run_study(geneids_study, keep_if=keep_if)
    # 2. Write results
    #    Write parameters:
    #    The format_string names below are the same names as in the namedtuple field_names.
    prtfmt = "{GO} {NS} {level:>2} {depth:>2} {p_fdr_bh:5.2e} {study_count:>5} {name}\n"
    wr_params = {
      # Format for printing in text format
      'prtfmt' : prtfmt, 
      # Format for p-values in tsv and xlsx format
      'fld2fmt' : {'p_fdr_bh' : '{:8.2e}'}, 
      # Print a subset namedtuple fields, don't print all fields in namedtuple.
      'prt_flds' : get_fmtflds(prtfmt) 
    }
    #    2a. Use the write functions inside the GOEnrichmentStudy class.
    _wr_3fmt_goeaobj(goea_results, goeaobj, wr_params, log)
    #    2b. Use the write functions straight from the wr_tbl package to print a list of namedtuples.
    _wr_3fmt_wrtbl(goea_results, wr_params, log)
    assert filecmp.cmp('nbt3102_subset_obj.tsv', 'nbt3102_subset_nt.tsv')
Exemplo n.º 4
0
 def prt_txt(self, prt, goea_results, prtfmt=None, **kws):
     """Print GOEA results in text format."""
     if prtfmt is None:
         prtfmt = "{GO} {NS} {p_uncorrected:5.2e} {study_count:>5} {name}\n"
     prtfmt = self.adjust_prtfmt(prtfmt)
     prt_flds = RPT.get_fmtflds(prtfmt)
     data_nts = get_goea_nts_prt(goea_results, prt_flds, **kws)
     RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws)
     return data_nts
Exemplo n.º 5
0
 def prt_txt(self, prt, goea_results, prtfmt=None, **kws):
     """Print GOEA results in text format."""
     if prtfmt is None:
         prtfmt = "{GO} {NS} {p_uncorrected:5.2e} {study_count:>5} {name}\n"
     prtfmt = self.adjust_prtfmt(prtfmt)
     prt_flds = RPT.get_fmtflds(prtfmt)
     data_nts = get_goea_nts_prt(goea_results, prt_flds, **kws)
     RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws)
     return data_nts
Exemplo n.º 6
0
 def prt_txt(self, prt, goea_results, prtfmt=None, **kws):
     """Print GOEA results in text format."""
     if prtfmt is None:
         prtfmt = ("{GO} {NS} {p_uncorrected:5.2e} {ratio_in_study:>6} {ratio_in_pop:>9} "
                   "{depth:02} {name:40} {study_items}\n")
     prtfmt = self.adjust_prtfmt(prtfmt)
     prt_flds = RPT.get_fmtflds(prtfmt)
     data_nts = get_goea_nts_prt(goea_results, prt_flds, **kws)
     RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws)
     return data_nts
Exemplo n.º 7
0
 def prt_txt(prt, goea_results, prtfmt=None, **kws):
     """Print GOEA results in text format."""
     objprt = PrtFmt()
     if prtfmt is None:
         flds = ['GO', 'NS', 'p_uncorrected',
                 'ratio_in_study', 'ratio_in_pop', 'depth', 'name', 'study_items']
         prtfmt = objprt.get_prtfmt_str(flds)
     prtfmt = objprt.adjust_prtfmt(prtfmt)
     prt_flds = RPT.get_fmtflds(prtfmt)
     data_nts = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws)
     RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws)
     return data_nts
Exemplo n.º 8
0
 def prt_txt(self, prt, goea_results, prtfmt=None, **kws):
     """Print GOEA results in text format."""
     objprt = PrtFmt()
     if prtfmt is None:
         flds = ['GO', 'NS', 'p_uncorrected', 'ratio_in_study', 'ratio_in_pop', 'depth', 'name', 'study_items']
         prtfmt = objprt.get_prtfmt_str(flds)
         #### prtfmt = " ".join([objprt.default_fld2fmt[f] for f in flds])
         #### prtfmt = ("{GO} {NS} {p_uncorrected:5.2e} {ratio_in_study:>6} {ratio_in_pop:>9} "
         ####           "{depth:02} {name:40} {study_items}\n")
     prtfmt = objprt.adjust_prtfmt(prtfmt)
     prt_flds = RPT.get_fmtflds(prtfmt)
     data_nts = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws)
     RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws)
     return data_nts
Exemplo n.º 9
0
 def prt_txt(self, prt, results_nt, prtfmt, **kws):
     """Print GOEA results in text format."""
     prtfmt = self.adjust_prtfmt(prtfmt)
     prt_flds = RPT.get_fmtflds(prtfmt)
     data_nts = self._get_nts(results_nt, prt_flds, True, **kws)
     RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws)
Exemplo n.º 10
0
 def prt_txt(self, prt, results_nt, prtfmt, **kws):
     """Print GOEA results in text format."""
     prtfmt = self.adjust_prtfmt(prtfmt)
     prt_flds = RPT.get_fmtflds(prtfmt)
     data_nts = self._get_nts(results_nt, prt_flds, True, **kws)
     RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws)