Exemplo n.º 1
0
    if True:
        for i, gp in enumerate(groups):
            yzer.piechart([sum(gp['naive_foxo1_tag_count'] >= min_thresh),
                           sum(gp['naive_foxo1_tag_count'] < min_thresh)],
                          ['With Foxo1', 'Without Foxo1'],
                          title='Co-occurrence with Foxo1- ' + labels[i],
                          save_dir=save_path, show_plot=False)
            yzer.piechart([sum(gp['lcmv_d12_foxo1_tag_count'] >= min_thresh),
                           sum(gp['lcmv_d12_foxo1_tag_count'] < min_thresh)],
                          ['With Foxo1', 'Without Foxo1'],
                          title='Co-occurrence with LCMV d12 Foxo1- ' +
                          labels[i],
                          save_dir=save_path, show_plot=False)
            yzer.histogram(gp['naive_foxo1_tag_count'].tolist(), bins=20,
                           title='Foxo1 peak tag count distribution- ' +
                           labels[i],
                           xlabel='Tag count in Foxo1 peak',
                           ylabel='Number of peaks',
                           save_dir=save_path, show_plot=False)

    if True:
        yzer.boxplot([gp[gp['naive_foxo1_tag_count'] > 0]['naive_foxo1_tag_count']
                      for gp in groups],
                     labels,
                     title='ATAC-seq regions with Foxo1 peaks by group',
                     ylabel='Foxo1 peak tag count', save_dir=save_path,
                     show_plot=False)

    # TCF1
    if True:
        for i, gp in enumerate(groups):
            yzer.piechart([sum(gp['tcf1_tag_count'] >= min_thresh),
Exemplo n.º 2
0
        shuffled = data['domain_id'].values.copy()
        shuffle(shuffled)
        data['shuffled_domain_id'] = shuffled

        grouped = data.groupby(by='domain_id', as_index=False).mean()
        shuffled_grouped = data.groupby(by='shuffled_domain_id',
                                        as_index=False).mean()

        grouped = grouped[grouped['domain_id'] != 0]
        shuffled_grouped = shuffled_grouped[
            shuffled_grouped['shuffled_domain_id'] != 0]

        ax = yzer.histogram(grouped[dex_kla_key],
                            bins=50,
                            label='Replicate {0} Data'.format(rep),
                            show_legend=False,
                            show_plot=False)

        ax = yzer.histogram(
            shuffled_grouped[dex_kla_key],
            bins=50,
            title='Dex+KLA over KLA LFC {0} by HiC Domain'.format(rep),
            xlabel='Mean Dex+KLA log fold change for transcripts up in KLA',
            ylabel='Count of Domains',
            color='black',
            fill=False,
            label='Shuffled Data'.format(rep),
            show_legend=True,
            save_dir=img_dirpath,
            show_plot=False,
Exemplo n.º 3
0
        grouped = grouped[grouped['up_in_kla'] >= 1]
        shuffled_grouped = shuffled_grouped[shuffled_grouped['up_in_kla'] >= 1]

        grouped['kla_ratio'] = grouped['up_in_kla'] / grouped['count']
        grouped = grouped.sort(['kla_ratio']).reset_index(drop=True)
        grouped['idx'] = grouped.index

        shuffled_grouped['kla_ratio'] = shuffled_grouped[
            'up_in_kla'] / shuffled_grouped['count']
        shuffled_grouped = shuffled_grouped.sort(['kla_ratio'
                                                  ]).reset_index(drop=True)
        shuffled_grouped['idx'] = shuffled_grouped.index

        ax = yzer.histogram(grouped['kla_ratio'],
                            bins=50,
                            label='Replicate {0} Data'.format(rep),
                            show_legend=False,
                            show_plot=False)

        ax = yzer.histogram(
            shuffled_grouped['kla_ratio'],
            bins=50,
            title='Percent of transcripts up in KLA {0} by HiC Domain'.format(
                rep),
            ylabel='Count of domains',
            xlabel='Percent of transcripts up in KLA',
            color='black',
            fill=False,
            label='Shuffled Data'.format(rep),
            show_legend=True,
            save_dir=img_dirpath,
Exemplo n.º 4
0
            print 'Percentage: ', len(subset) / len(refseq)
            print 'mRNA: ', sum(subset['type'] == 'mRNA'), sum(
                subset['type'] == 'mRNA') / len(subset)
            print 'rRNA: ', sum(subset['type'] != 'mRNA'), sum(
                subset['type'] != 'mRNA') / len(subset)
        # Note that some transcripts encompass two very close refseq genes; we're filtering those out for now.
        mrna_with_runoff = refseq_with_runoff[
            (refseq_with_runoff['type'] == 'mRNA')
            & (refseq_with_runoff['refseq'] == 't')
            & (refseq_with_runoff['percent_covered'] < 1.5)]
        mrna_no_runoff = refseq_no_runoff[
            (refseq_no_runoff['type'] == 'mRNA')
            & (refseq_no_runoff['refseq'] == 't')
            & (refseq_no_runoff['percent_covered'] < 1.5)]
        if False:
            yzer.histogram(mrna_with_runoff['percent_covered'])
            yzer.histogram(mrna_no_runoff['percent_covered'])

            relationships = ['is contained by', 'contains', 'overlaps with']
            with_runoff_counts = [
                sum(mrna_with_runoff['relationship'] == rel)
                for rel in relationships
            ]
            no_runoff_counts = [
                sum(mrna_no_runoff['relationship'] == rel)
                for rel in relationships
            ]
            yzer.piechart(with_runoff_counts, labels=relationships)
            yzer.piechart(no_runoff_counts, labels=relationships)

    if True:
Exemplo n.º 5
0
            dirpath, 'Figures', 'me2_atac_overlaps')

        yzer.piechart([len(atac_only), len(atac_me2)],
                      ['ATAC only', 'ATAC with H3K4me2'],
                      title='ATAC-seq region overlaps',
                      save_dir=save_path)

        yzer.piechart([len(me2_only), len(me2_atac)],
                      ['H3K4me2 only', 'H3K4me2 with ATAC'],
                      title='H3K4me2 overlaps',
                      save_dir=save_path)

        yzer.boxplot([atac_only['tag_count'], atac_me2['tag_count']],
                     ['ATAC only', 'ATAC with H3K4me2'],
                     title='ATAC-seq tag counts by H3K4me2 overlap',
                     xlabel='Group', ylabel='Peak tag count',
                     save_dir=save_path)
        yzer.boxplot([me2_only['tag_count'], me2_atac['tag_count']],
                     ['H3K4me2 only', 'H3K4me2 with ATAC'],
                     title='H3K4me2 tag counts by ATAC-seq overlap',
                     xlabel='Group', ylabel='Peak tag count',
                     save_dir=save_path)
        yzer.histogram(atac_only['tag_count'].tolist(), bins=20,
                       title='ATAC-seq-only peak tag count distribution',
                       xlabel='Tag count in peak', ylabel='Number of peaks',
                       save_dir=save_path)
        yzer.histogram(me2_only['tag_count'].tolist(), bins=20,
                       title='H3K4me2-only peak tag count distribution',
                       xlabel='Tag count in peak', ylabel='Number of peaks',
                       save_dir=save_path)