Пример #1
0
        def estimateEnrichmentOfTFBS(infiles, outfile):
            '''
            Estimate the significance of trnascription factors that are
            associated with a foreground set of intervals vs a background set
            '''
            E.info("Running Fisher's exact test for TF enrichment between %s" %
                   " & ".join([os.path.basename(x) for x in infiles]))

            # required files
            match_table = "match_result"

            # we don't know which order the foreground and backgorund
            # will come in
            background = [infile for infile in infiles if
                          re.search("background", infile)][0]
            foreground = [infile for infile in infiles if
                          re.search("foreground", infile)][0]

            # run significance testing

            PipelineTFM.testSignificanceOfMatrices(background,
                                                   foreground,
                                                   PARAMS["database"],
                                                   match_table,
                                                   outfile)

            E.info("Completed Fisher's exact test for "
                   "TF enrichment between %s" %
                   " & ".join([os.path.basename(x) for x in infiles]))
Пример #2
0
        def estimateEnrichmentOfTFBS(infiles, outfile):
            '''
            Estimate the significance of trnascription factors that are
            associated with a foreground set of intervals vs a background set
            '''
            E.info("Running Fisher's exact test for TF enrichment between %s" %
                   " & ".join([os.path.basename(x) for x in infiles]))

            # required files
            match_table = "match_result"

            # we don't know which order the foreground and backgorund
            # will come in
            background = [
                infile for infile in infiles
                if re.search("background", infile)
            ][0]
            foreground = [
                infile for infile in infiles
                if re.search("foreground", infile)
            ][0]

            # run significance testing

            PipelineTFM.testSignificanceOfMatrices(background, foreground,
                                                   PARAMS["database"],
                                                   match_table, outfile)

            E.info("Completed Fisher's exact test for "
                   "TF enrichment between %s" %
                   " & ".join([os.path.basename(x) for x in infiles]))
Пример #3
0
    def estimateEnrichmentOfTFBS(infiles, outfile):
        '''
        Estimate the significance of transcription factors that are associated
        with a foreground set of intervals vs a background set matched for
        sequence composition.
        '''
        E.info("Running Fisher's exact test for TF enrichment between %s" %
               " & ".join([os.path.basename(x) for x in infiles]))

        # required files
        match_table = "match_result"

        # we don't know which order the foreground and background will come in
        background = [infile for infile in infiles if
                      re.search("background", infile)][0]
        foreground = ["%s.foreground.tsv" %
                      re.match(".+/(.+)\.foreground\.gc\.tsv",
                               infile).groups()[0]
                      for infile in infiles if re.search("foreground",
                                                         infile)][0]
        # run significance testing
        PipelineTFM.testSignificanceOfMatrices(background,
                                               foreground,
                                               PARAMS["database"],
                                               match_table,
                                               outfile,
                                               PARAMS["genesets_header"])

        E.info("Completed Fisher's exact test for TF enrichment between %s" %
               " & ".join([os.path.basename(x) for x in infiles]))
Пример #4
0
    def estimateEnrichmentOfTFBS(infiles, outfile):
        '''
        estimate the significance of trnascription factors that are associated with
        a foreground set of intervals vs a background set matched for CpG content
        '''
        # required files
        match_table = "match_result"

        # we don't know which order the foreground and backgorund will come in
        background = [
            infile for infile in infiles if re.search("background", infile)][0]
        foreground = ["%s.foreground.tsv" % re.match(".+/(.+)\.foreground\.gc\.tsv", infile).groups()[0]
                      for infile in infiles if re.search("foreground", infile)][0]
        # run significance testing
        PipelineTransfacMatch.testSignificanceOfMatrices(
            background, foreground, PARAMS["database"], match_table, outfile)
Пример #5
0
    def estimateEnrichmentOfTFBS(infiles, outfile):
        '''
        estimate the significance of trnascription factors that are associated with
        a foreground set of intervals vs a background set
        '''
        # required files
        match_table = "match_result"

        #we don't know which order the foreground and backgorund will come in
        background = [
            infile for infile in infiles if re.search("background", infile)
        ][0]
        foreground = [
            infile for infile in infiles if re.search("foreground", infile)
        ][0]

        # run significance testing
        PipelineTransfacMatch.testSignificanceOfMatrices(
            background, foreground, PARAMS["database"], match_table, outfile)
Пример #6
0
        def estimateEnrichmentOfTFBS(infiles, outfile):
            '''
            Estimate the significance of transcription factors that are
            associated with a foreground set of intervals vs a background
            set matched for sequence composition.
            '''
            E.info("Running Fisher's exact test for TF enrichment between %s" %
                   " & ".join([os.path.basename(x) for x in infiles]))

            # required files
            match_table = "match_result"

            # we don't know which order the foreground and background
            # will come in
            background = [
                infile for infile in infiles
                if re.search("background", infile)
            ][0]
            foreground = [
                "%s.foreground.tsv" %
                re.match(".+/(.+)\.foreground\.gc\.tsv", infile).groups()[0]
                for infile in infiles if re.search("foreground", infile)
            ][0]

            # run significance testing
            # MM: added in directionality into FET - might only be looking for
            # enrichment OR depletion so don't want to hammer those p-value
            # too hard

            pval_direct = PARAMS['fisher_direction']

            PipelineTFM.testSignificanceOfMatrices(background, foreground,
                                                   PARAMS["database"],
                                                   match_table, outfile,
                                                   PARAMS["genesets_header"],
                                                   pval_direct)

            E.info("Completed Fisher's exact test for "
                   "TF enrichment between %s" %
                   " & ".join([os.path.basename(x) for x in infiles]))
Пример #7
0
        def estimateEnrichmentOfTFBS(infiles, outfile):
            '''
            Estimate the significance of transcription factors that are
            associated with a foreground set of intervals vs a background
            set matched for sequence composition.
            '''
            E.info("Running Fisher's exact test for TF enrichment between %s" %
                   " & ".join([os.path.basename(x) for x in infiles]))

            # required files
            match_table = "match_result"

            # we don't know which order the foreground and background
            # will come in
            background = [infile for infile in infiles if
                          re.search("background", infile)][0]
            foreground = ["%s.foreground.tsv" %
                          re.match(".+/(.+)\.foreground\.gc\.tsv",
                                   infile).groups()[0]
                          for infile in infiles if re.search("foreground",
                                                             infile)][0]

            # run significance testing
            # MM: added in directionality into FET - might only be looking for
            # enrichment OR depletion so don't want to hammer those p-value
            # too hard

            pval_direct = PARAMS['fisher_direction']

            PipelineTFM.testSignificanceOfMatrices(background,
                                                   foreground,
                                                   PARAMS["database"],
                                                   match_table,
                                                   outfile,
                                                   PARAMS["genesets_header"],
                                                   pval_direct)

            E.info("Completed Fisher's exact test for "
                   "TF enrichment between %s" %
                   " & ".join([os.path.basename(x) for x in infiles]))