def plot_snp_dens_distrib(variations, window_size, data_dir, write_bg=False):
    # Calculate and plot variations density distribution
    density = calc_snp_density(variations, window_size)
    density_distrib, bins = histogram(density, 20)
    fpath = join(data_dir, 'snps_density.png')
    title = 'SNP density distribution per {} bp windows'.format(window_size)
    plot_distrib(density_distrib, bins, fhand=open(fpath, 'w'), color='c',
                 mpl_params={'set_xlabel': {'args': ['SNP density'],
                                            'kwargs': {}},
                             'set_ylabel': {'args': ['SNP number'],
                                            'kwargs': {}},
                             'set_title': {'args': [title], 'kwargs': {}},
                             'set_yscale': {'args': ['log'], 'kwargs': {}}})

    # Manhattan plot for SNP density
    fpath = join(data_dir, 'snps_density_manhattan.png')
    fhand = open(fpath, 'w')
    title = 'SNP denisity along the genome'
    chrom = _load_matrix(variations, CHROM_FIELD)
    pos = _load_matrix(variations, POS_FIELD)
    manhattan_plot(chrom, pos, density,
                   mpl_params={'set_xlabel': {'args': ['Chromosome'],
                                              'kwargs': {}},
                               'set_ylabel': {'args': ['SNP per {} bp'.format(window_size)],
                                              'kwargs': {}},
                               'set_title': {'args': [title], 'kwargs': {}}},
                   fhand=fhand, figsize=(15, 7.5), ylim=1)
    
    # Save in bedgraph format
    if write_bg:
        bg_fhand = open(join(data_dir, 'snp_density.bg'), 'w')
        pos_dens = PositionalStatsCalculator(chrom, pos, density)
        pos_dens.write(bg_fhand, 'snp_density',
                       'SNP number in {} bp around'.format(window_size),
                       track_type='bedgraph')
示例#2
0
    def test_manhattan_plot(self):

        assert _look_for_first_different([1, 1, 3], 0) == 2
        assert _look_for_first_different([1, 1, 3], 1) == 2
        assert _look_for_first_different([1, 1, 3], 2) == 3
        assert _look_for_first_different([1, 1, 1], 0) == 3

        chrom = numpy.array([b"chr1"] * 3 + [b"chr2"] * 3 + [b"chr3"] * 3)
        pos = numpy.array([1, 2, 3, 2, 5, 10, 1, 2, 3])
        statistic = numpy.array([2, 3, 2, 5, 3, 1, 3, 4, 2])
        with NamedTemporaryFile(suffix=".png") as fhand:
            manhattan_plot(chrom, pos, statistic, fhand=fhand, figsize=(10, 10))

        with NamedTemporaryFile(suffix=".png") as fhand:
            manhattan_plot(chrom, pos, statistic, fhand=fhand, split_by_chrom=True)
def plot_inbreeding_coefficient(variations, max_num_allele,  data_dir,
                                window_size, chunk_size=SNPS_PER_CHUNK,
                                min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT,
                                write_bg=False, calc_genome_wise=False):
    # Calculate Inbreeding coefficient distribution
    inbreed_coef = calc_inbreeding_coef(variations, chunk_size=chunk_size,
                                        min_num_genotypes=min_num_genotypes)
    ic_distrib, bins = histogram(inbreed_coef, 50, range_=(-1, 1))
      
    fpath = join(data_dir, 'inbreeding_coef_distribution.png')
    fhand = open(fpath, 'w')
    title = 'Inbreeding coefficient distribution all samples'
    plot_distrib(ic_distrib, bins, fhand=fhand,
                 mpl_params={'set_xlabel': {'args': ['Inbreeding coefficient'],
                                            'kwargs': {}},
                             'set_ylabel': {'args': ['Number of SNPs'],
                                            'kwargs': {}},
                             'set_title': {'args': [title], 'kwargs': {}},
                             'set_xlim': {'args': [-1, 1], 'kwargs': {}}})
    
    # Save in bedgraph file
    if calc_genome_wise:
        bg_fhand = open(join(data_dir, 'ic.bg'), 'w')
        chrom = _load_matrix(variations, CHROM_FIELD)
        pos = _load_matrix(variations, POS_FIELD)
        pos_ic = PositionalStatsCalculator(chrom, pos, inbreed_coef)
        if write_bg:
            pos_ic.write(bg_fhand, 'IC', 'Inbreeding coefficient',
                              track_type='bedgraph')
        
        # Plot Ic along genome taking sliding windows
        pos_ic = pos_ic.calc_window_stat()
        chrom, pos, ic_windows = pos_ic.chrom, pos_ic.pos, pos_ic.stat 
        fpath = join(data_dir, 'ic_manhattan.png')
        fhand = open(fpath, 'w')
        title = 'Inbreeding coefficient (IC) along the genome'
        manhattan_plot(chrom, pos, ic_windows, fhand=fhand, figsize=(15, 7.5),
                       ylim=-1,
                       mpl_params={'set_xlabel': {'args': ['Chromosome'],
                                                'kwargs': {}},
                                 'set_ylabel': {'args': ['IC'],
                                                'kwargs': {}},
                                 'set_title': {'args': [title], 'kwargs': {}}})
def plot_maf(variations, data_dir, chunk_size=SNPS_PER_CHUNK, window_size=None,
             min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT, write_bg=False,
             calc_genome_wise=False):
    # Calculate and plot MAF distribution
    mafs = calc_maf(variations, min_num_genotypes, chunk_size)
    maf_distrib, bins = histogram(mafs, n_bins=25, range_=(0, 1))
    
    fpath = join(data_dir, 'mafs.png')
    title = 'Maximum allele frequency (MAF) distribution'
    plot_distrib(maf_distrib, bins=bins, fhand=open(fpath, 'w'), color='c',
                   mpl_params={'set_xlabel': {'args': ['MAF'], 'kwargs': {}},
                               'set_ylabel': {'args': ['SNP number'],
                                              'kwargs': {}},
                               'set_title': {'args': [title], 'kwargs': {}}})

    # Write bedgraph file
    if calc_genome_wise:
        chrom = _load_matrix(variations, CHROM_FIELD)
        pos = _load_matrix(variations, POS_FIELD) 
        bg_fhand = open(join(data_dir, 'maf.bg'), 'w')
        pos_maf = PositionalStatsCalculator(chrom, pos, mafs,
                                            window_size=window_size,
                                            step=window_size)
        if write_bg:
            pos_maf.write(bg_fhand, 'MAF', 'Maximum allele frequency',
                          track_type='bedgraph')
        if window_size is not None:
            pos_maf = pos_maf.calc_window_stat()
        
    
        # Manhattan plot for MAF along genome
        fpath = join(data_dir, 'maf_manhattan.png')
        fhand = open(fpath, 'w')
        title = 'Max Allele Freq (MAF) along the genome'
        chrom, pos, mafs = pos_maf.chrom, pos_maf.pos, pos_maf.stat
        mpl_params = {'set_xlabel': {'args': ['Chromosome'], 'kwargs': {}},
                      'set_ylabel': {'args': ['MAF'],'kwargs': {}},
                      'set_title': {'args': [title], 'kwargs': {}}}
        manhattan_plot(chrom, pos, mafs, mpl_params=mpl_params,
                       fhand=fhand, figsize=(15, 7.5))
def plot_r2(variations, window_size, data_dir, write_bg=False):
    
    # Calculate LD r2 parameter in windows
    chrom, pos, r2 = calc_r2_windows(variations, window_size=window_size)
    
    # Plot r2 distribution
    fpath = os.path.join(data_dir, 'r2_distrib.png')
    distrib, bins = histogram(r2, n_bins=50, range_=(0, 1))
    title = 'r2 distribution in windows of {} bp'.format(window_size)
    mpl_params={'set_xlabel': {'args': ['r2'], 'kwargs': {}},
                'set_ylabel': {'args': ['Number of windows'], 'kwargs': {}},
                'set_title': {'args': [title], 'kwargs': {}}}
    plot_distrib(distrib, bins, fhand=open(fpath, 'w'), figsize=(7, 7),
                 mpl_params=mpl_params)
    
    # Manhattan plot
    mask = numpy.logical_not(numpy.isnan(r2))
    chrom = chrom[mask]
    pos = pos[mask]
    r2 = r2[mask]
    fpath = os.path.join(data_dir, 'r2_manhattan.png')
    title = 'r2 along genome in windows of {} bp'.format(window_size)
    mpl_params={'set_xlabel': {'args': ['Chromosome'], 'kwargs': {}},
                'set_ylabel': {'args': ['r2'], 'kwargs': {}},
                'set_title': {'args': [title], 'kwargs': {}}}
    manhattan_plot(chrom, pos, r2, fhand=open(fpath, 'w'), figsize=(15, 7),
                   marker='k', mpl_params=mpl_params)
    
    # Write bg
    if write_bg:
        fpath = os.path.join(data_dir, 'r2_windows_{}.png'.format(window_size))
        bg_fhand = open(fpath, 'w')
        pos_r2 = PositionalStatsCalculator(chrom, pos, r2,
                                           window_size=window_size,
                                           step=window_size,
                                           take_windows=False)
        description = 'mean r2 in windows of {} bp'.format(window_size)
        pos_r2.write(bg_fhand, 'r2', description, track_type='bedgraph')
示例#6
0
    def test_manhattan_plot(self):

        assert _look_for_first_different([1, 1, 3], 0) == 2
        assert _look_for_first_different([1, 1, 3], 1) == 2
        assert _look_for_first_different([1, 1, 3], 2) == 3
        assert _look_for_first_different([1, 1, 1], 0) == 3

        chrom = numpy.array([b'chr1'] * 3 + [b'chr2'] * 3 + [b'chr3'] * 3)
        pos = numpy.array([1, 2, 3, 2, 5, 10, 1, 2, 3])
        statistic = numpy.array([2, 3, 2, 5, 3, 1, 3, 4, 2])
        with NamedTemporaryFile(suffix='.png') as fhand:
            manhattan_plot(chrom,
                           pos,
                           statistic,
                           fhand=fhand,
                           figsize=(10, 10))

        with NamedTemporaryFile(suffix='.png') as fhand:
            manhattan_plot(chrom,
                           pos,
                           statistic,
                           fhand=fhand,
                           split_by_chrom=True)
def plot_nucleotide_diversity_measures(variations, max_num_alleles,
                                       window_size, data_dir,
                                       chunk_size=SNPS_PER_CHUNK,
                                       write_bg=False,
                                       min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT):
    fig = Figure(figsize=(20, 20))
    canvas = FigureCanvas(fig)
    marker = 'k'
    chrom = _load_matrix(variations, CHROM_FIELD)
    pos = _load_matrix(variations, POS_FIELD)

    # Number of variable positions per bp
    snp_density = PositionalStatsCalculator(chrom, pos,
                                            numpy.ones(pos.shape),
                                            window_size=window_size,
                                            step=window_size)
    snp_density = snp_density.calc_window_stat()
    bg_fhand = open(join(data_dir, 'diversity_s.bg'), 'w')
    if write_bg:
        snp_density.write(bg_fhand, 's',
                          'SNP density in windows of {} bp'.format(window_size),
                          track_type='bedgraph')
    axes = fig.add_subplot(311)
    title = 'Nucleotide diversity measures averaged in windows of {} bp'
    title = title.format(window_size)
    mpl_params = {'set_title': {'args': [title], 'kwargs': {}},
                  'set_ylabel': {'args': ['SNPs number / bp'], 'kwargs': {}},
                  'set_ylim': {'args': [0, 1.2*numpy.max(snp_density.stat)],
                               'kwargs': {}}}
    manhattan_plot(snp_density.chrom, snp_density.pos, snp_density.stat,
                   mpl_params=mpl_params, axes=axes, ylim=0, show_chroms=False,
                   marker=marker)

    # Watterson estimator of nucleotide diversity
    n_seqs = variations[GT_FIELD].shape[1] * variations[GT_FIELD].shape[2]
    correction_factor = numpy.sum(1 / numpy.arange(1, n_seqs))
    watterson = snp_density
    watterson.stat = watterson.stat / correction_factor
    bg_fhand = open(join(data_dir, 'diversity_s.bg'), 'w')
    description = 'SNP density in windows of {} bp'.format(window_size)
    if write_bg:
        watterson.write(bg_fhand, 's', description, track_type='bedgraph')
    axes = fig.add_subplot(312)
    mpl_params={'set_ylabel': {'args': ['Watterson estimator'], 'kwargs': {}},
                'set_ylim': {'args': [0, 1.2*numpy.max(watterson.stat)],
                             'kwargs': {}}}
    manhattan_plot(watterson.chrom, watterson.pos, watterson.stat,
                   mpl_params=mpl_params, axes=axes, ylim=0, show_chroms=False,
                   marker=marker)

    # Expected heterozygosity (Pi)
    exp_het = calc_expected_het(variations, chunk_size=chunk_size,
                                min_num_genotypes=min_num_genotypes)
    pi = PositionalStatsCalculator(chrom, pos, exp_het,
                                   window_size=window_size, step=window_size)
    pi = pi.calc_window_stat()
    bg_fhand = open(join(data_dir, 'diversity_pi.bg'), 'w')
    description = 'Pi in windows of {} bp'.format(window_size)
    if write_bg:
        pi.write(bg_fhand, 's', description, track_type='bedgraph')
    axes = fig.add_subplot(313)
    mpl_params={'set_xlabel': {'args': ['Chromosome'], 'kwargs': {}},
                'set_ylabel': {'args': ['Pi'], 'kwargs': {}},
                'set_ylim': {'args': [0, 1.2*numpy.max(pi.stat)],
                             'kwargs': {}}}
    manhattan_plot(pi.chrom, pi.pos, pi.stat, axes=axes, ylim=0, marker=marker,
                   mpl_params=mpl_params)
    canvas.print_figure(open(join(data_dir, 'nucleotide_diversity.png'), 'w'))
示例#8
0
def plot_nucleotide_diversity_measures(
        variations,
        max_num_alleles,
        window_size,
        data_dir,
        chunk_size=SNPS_PER_CHUNK,
        write_bg=False,
        min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT):
    fig = Figure(figsize=(20, 20))
    canvas = FigureCanvas(fig)
    marker = 'k'
    chrom = _load_matrix(variations, CHROM_FIELD)
    pos = _load_matrix(variations, POS_FIELD)

    # Number of variable positions per bp
    snp_density = PositionalStatsCalculator(chrom,
                                            pos,
                                            numpy.ones(pos.shape),
                                            window_size=window_size,
                                            step=window_size)
    snp_density = snp_density.calc_window_stat()
    bg_fhand = open(join(data_dir, 'diversity_s.bg'), 'w')
    if write_bg:
        snp_density.write(
            bg_fhand,
            's',
            'SNP density in windows of {} bp'.format(window_size),
            track_type='bedgraph')
    axes = fig.add_subplot(311)
    title = 'Nucleotide diversity measures averaged in windows of {} bp'
    title = title.format(window_size)
    mpl_params = {
        'set_title': {
            'args': [title],
            'kwargs': {}
        },
        'set_ylabel': {
            'args': ['SNPs number / bp'],
            'kwargs': {}
        },
        'set_ylim': {
            'args': [0, 1.2 * numpy.max(snp_density.stat)],
            'kwargs': {}
        }
    }
    manhattan_plot(snp_density.chrom,
                   snp_density.pos,
                   snp_density.stat,
                   mpl_params=mpl_params,
                   axes=axes,
                   ylim=0,
                   show_chroms=False,
                   marker=marker)

    # Watterson estimator of nucleotide diversity
    n_seqs = variations[GT_FIELD].shape[1] * variations[GT_FIELD].shape[2]
    correction_factor = numpy.sum(1 / numpy.arange(1, n_seqs))
    watterson = snp_density
    watterson.stat = watterson.stat / correction_factor
    bg_fhand = open(join(data_dir, 'diversity_s.bg'), 'w')
    description = 'SNP density in windows of {} bp'.format(window_size)
    if write_bg:
        watterson.write(bg_fhand, 's', description, track_type='bedgraph')
    axes = fig.add_subplot(312)
    mpl_params = {
        'set_ylabel': {
            'args': ['Watterson estimator'],
            'kwargs': {}
        },
        'set_ylim': {
            'args': [0, 1.2 * numpy.max(watterson.stat)],
            'kwargs': {}
        }
    }
    manhattan_plot(watterson.chrom,
                   watterson.pos,
                   watterson.stat,
                   mpl_params=mpl_params,
                   axes=axes,
                   ylim=0,
                   show_chroms=False,
                   marker=marker)

    # Expected heterozygosity (Pi)
    exp_het = calc_expected_het(variations,
                                chunk_size=chunk_size,
                                min_num_genotypes=min_num_genotypes)
    pi = PositionalStatsCalculator(chrom,
                                   pos,
                                   exp_het,
                                   window_size=window_size,
                                   step=window_size)
    pi = pi.calc_window_stat()
    bg_fhand = open(join(data_dir, 'diversity_pi.bg'), 'w')
    description = 'Pi in windows of {} bp'.format(window_size)
    if write_bg:
        pi.write(bg_fhand, 's', description, track_type='bedgraph')
    axes = fig.add_subplot(313)
    mpl_params = {
        'set_xlabel': {
            'args': ['Chromosome'],
            'kwargs': {}
        },
        'set_ylabel': {
            'args': ['Pi'],
            'kwargs': {}
        },
        'set_ylim': {
            'args': [0, 1.2 * numpy.max(pi.stat)],
            'kwargs': {}
        }
    }
    manhattan_plot(pi.chrom,
                   pi.pos,
                   pi.stat,
                   axes=axes,
                   ylim=0,
                   marker=marker,
                   mpl_params=mpl_params)
    canvas.print_figure(open(join(data_dir, 'nucleotide_diversity.png'), 'w'))
示例#9
0
def plot_inbreeding_coefficient(
        variations,
        max_num_allele,
        data_dir,
        window_size,
        chunk_size=SNPS_PER_CHUNK,
        min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT,
        write_bg=False,
        calc_genome_wise=False):
    # Calculate Inbreeding coefficient distribution
    inbreed_coef = calc_inbreeding_coef(variations,
                                        chunk_size=chunk_size,
                                        min_num_genotypes=min_num_genotypes)
    ic_distrib, bins = histogram(inbreed_coef, 50, range_=(-1, 1))

    fpath = join(data_dir, 'inbreeding_coef_distribution.png')
    fhand = open(fpath, 'w')
    title = 'Inbreeding coefficient distribution all samples'
    plot_distrib(ic_distrib,
                 bins,
                 fhand=fhand,
                 mpl_params={
                     'set_xlabel': {
                         'args': ['Inbreeding coefficient'],
                         'kwargs': {}
                     },
                     'set_ylabel': {
                         'args': ['Number of SNPs'],
                         'kwargs': {}
                     },
                     'set_title': {
                         'args': [title],
                         'kwargs': {}
                     },
                     'set_xlim': {
                         'args': [-1, 1],
                         'kwargs': {}
                     }
                 })

    # Save in bedgraph file
    if calc_genome_wise:
        bg_fhand = open(join(data_dir, 'ic.bg'), 'w')
        chrom = _load_matrix(variations, CHROM_FIELD)
        pos = _load_matrix(variations, POS_FIELD)
        pos_ic = PositionalStatsCalculator(chrom, pos, inbreed_coef)
        if write_bg:
            pos_ic.write(bg_fhand,
                         'IC',
                         'Inbreeding coefficient',
                         track_type='bedgraph')

        # Plot Ic along genome taking sliding windows
        pos_ic = pos_ic.calc_window_stat()
        chrom, pos, ic_windows = pos_ic.chrom, pos_ic.pos, pos_ic.stat
        fpath = join(data_dir, 'ic_manhattan.png')
        fhand = open(fpath, 'w')
        title = 'Inbreeding coefficient (IC) along the genome'
        manhattan_plot(chrom,
                       pos,
                       ic_windows,
                       fhand=fhand,
                       figsize=(15, 7.5),
                       ylim=-1,
                       mpl_params={
                           'set_xlabel': {
                               'args': ['Chromosome'],
                               'kwargs': {}
                           },
                           'set_ylabel': {
                               'args': ['IC'],
                               'kwargs': {}
                           },
                           'set_title': {
                               'args': [title],
                               'kwargs': {}
                           }
                       })
示例#10
0
def plot_snp_dens_distrib(variations, window_size, data_dir, write_bg=False):
    # Calculate and plot variations density distribution
    density = calc_snp_density(variations, window_size)
    density_distrib, bins = histogram(density, 20)
    fpath = join(data_dir, 'snps_density.png')
    title = 'SNP density distribution per {} bp windows'.format(window_size)
    plot_distrib(density_distrib,
                 bins,
                 fhand=open(fpath, 'w'),
                 color='c',
                 mpl_params={
                     'set_xlabel': {
                         'args': ['SNP density'],
                         'kwargs': {}
                     },
                     'set_ylabel': {
                         'args': ['SNP number'],
                         'kwargs': {}
                     },
                     'set_title': {
                         'args': [title],
                         'kwargs': {}
                     },
                     'set_yscale': {
                         'args': ['log'],
                         'kwargs': {}
                     }
                 })

    # Manhattan plot for SNP density
    fpath = join(data_dir, 'snps_density_manhattan.png')
    fhand = open(fpath, 'w')
    title = 'SNP denisity along the genome'
    chrom = _load_matrix(variations, CHROM_FIELD)
    pos = _load_matrix(variations, POS_FIELD)
    manhattan_plot(chrom,
                   pos,
                   density,
                   mpl_params={
                       'set_xlabel': {
                           'args': ['Chromosome'],
                           'kwargs': {}
                       },
                       'set_ylabel': {
                           'args': ['SNP per {} bp'.format(window_size)],
                           'kwargs': {}
                       },
                       'set_title': {
                           'args': [title],
                           'kwargs': {}
                       }
                   },
                   fhand=fhand,
                   figsize=(15, 7.5),
                   ylim=1)

    # Save in bedgraph format
    if write_bg:
        bg_fhand = open(join(data_dir, 'snp_density.bg'), 'w')
        pos_dens = PositionalStatsCalculator(chrom, pos, density)
        pos_dens.write(bg_fhand,
                       'snp_density',
                       'SNP number in {} bp around'.format(window_size),
                       track_type='bedgraph')
示例#11
0
def plot_maf(variations,
             data_dir,
             chunk_size=SNPS_PER_CHUNK,
             window_size=None,
             min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT,
             write_bg=False,
             calc_genome_wise=False):
    # Calculate and plot MAF distribution
    mafs = calc_maf(variations, min_num_genotypes, chunk_size)
    maf_distrib, bins = histogram(mafs, n_bins=25, range_=(0, 1))

    fpath = join(data_dir, 'mafs.png')
    title = 'Maximum allele frequency (MAF) distribution'
    plot_distrib(maf_distrib,
                 bins=bins,
                 fhand=open(fpath, 'w'),
                 color='c',
                 mpl_params={
                     'set_xlabel': {
                         'args': ['MAF'],
                         'kwargs': {}
                     },
                     'set_ylabel': {
                         'args': ['SNP number'],
                         'kwargs': {}
                     },
                     'set_title': {
                         'args': [title],
                         'kwargs': {}
                     }
                 })

    # Write bedgraph file
    if calc_genome_wise:
        chrom = _load_matrix(variations, CHROM_FIELD)
        pos = _load_matrix(variations, POS_FIELD)
        bg_fhand = open(join(data_dir, 'maf.bg'), 'w')
        pos_maf = PositionalStatsCalculator(chrom,
                                            pos,
                                            mafs,
                                            window_size=window_size,
                                            step=window_size)
        if write_bg:
            pos_maf.write(bg_fhand,
                          'MAF',
                          'Maximum allele frequency',
                          track_type='bedgraph')
        if window_size is not None:
            pos_maf = pos_maf.calc_window_stat()

        # Manhattan plot for MAF along genome
        fpath = join(data_dir, 'maf_manhattan.png')
        fhand = open(fpath, 'w')
        title = 'Max Allele Freq (MAF) along the genome'
        chrom, pos, mafs = pos_maf.chrom, pos_maf.pos, pos_maf.stat
        mpl_params = {
            'set_xlabel': {
                'args': ['Chromosome'],
                'kwargs': {}
            },
            'set_ylabel': {
                'args': ['MAF'],
                'kwargs': {}
            },
            'set_title': {
                'args': [title],
                'kwargs': {}
            }
        }
        manhattan_plot(chrom,
                       pos,
                       mafs,
                       mpl_params=mpl_params,
                       fhand=fhand,
                       figsize=(15, 7.5))
示例#12
0
def plot_r2(variations, window_size, data_dir, write_bg=False):

    # Calculate LD r2 parameter in windows
    chrom, pos, r2 = calc_r2_windows(variations, window_size=window_size)

    # Plot r2 distribution
    fpath = os.path.join(data_dir, 'r2_distrib.png')
    distrib, bins = histogram(r2, n_bins=50, range_=(0, 1))
    title = 'r2 distribution in windows of {} bp'.format(window_size)
    mpl_params = {
        'set_xlabel': {
            'args': ['r2'],
            'kwargs': {}
        },
        'set_ylabel': {
            'args': ['Number of windows'],
            'kwargs': {}
        },
        'set_title': {
            'args': [title],
            'kwargs': {}
        }
    }
    plot_distrib(distrib,
                 bins,
                 fhand=open(fpath, 'w'),
                 figsize=(7, 7),
                 mpl_params=mpl_params)

    # Manhattan plot
    mask = numpy.logical_not(numpy.isnan(r2))
    chrom = chrom[mask]
    pos = pos[mask]
    r2 = r2[mask]
    fpath = os.path.join(data_dir, 'r2_manhattan.png')
    title = 'r2 along genome in windows of {} bp'.format(window_size)
    mpl_params = {
        'set_xlabel': {
            'args': ['Chromosome'],
            'kwargs': {}
        },
        'set_ylabel': {
            'args': ['r2'],
            'kwargs': {}
        },
        'set_title': {
            'args': [title],
            'kwargs': {}
        }
    }
    manhattan_plot(chrom,
                   pos,
                   r2,
                   fhand=open(fpath, 'w'),
                   figsize=(15, 7),
                   marker='k',
                   mpl_params=mpl_params)

    # Write bg
    if write_bg:
        fpath = os.path.join(data_dir, 'r2_windows_{}.png'.format(window_size))
        bg_fhand = open(fpath, 'w')
        pos_r2 = PositionalStatsCalculator(chrom,
                                           pos,
                                           r2,
                                           window_size=window_size,
                                           step=window_size,
                                           take_windows=False)
        description = 'mean r2 in windows of {} bp'.format(window_size)
        pos_r2.write(bg_fhand, 'r2', description, track_type='bedgraph')