Пример #1
0
def plot_hwe(variations, max_num_alleles, data_dir, ploidy=2,
             min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT,
             chunk_size=SNPS_PER_CHUNK):
    fpath = join(data_dir, 'hwe_chi2_distrib.png')
    fhand = open(fpath, 'w')
    fig = Figure(figsize=(10, 20))
    canvas = FigureCanvas(fig)
    
    num_alleles = range(2, max_num_alleles + 1)
    gs = gridspec.GridSpec(len(num_alleles), 1)
    for i, num_allele in enumerate(num_alleles):
        df = len(list(combinations_with_replacement(range(num_allele),
                                                    ploidy))) - num_allele
                                                    
        hwe_test =  calc_hwe_chi2_test(variations, num_allele=num_allele,
                                       min_num_genotypes=min_num_genotypes,
                                       chunk_size=chunk_size)
        hwe_chi2 = hwe_test[:, 0]
        hwe_chi2_distrib, bins = histogram(hwe_chi2, n_bins=50)
        
        # Plot observed distribution
        axes = fig.add_subplot(gs[i, 0])
        title = 'Chi2 df={} statistic values distribution'.format(df)
        mpl_params = {'set_xlabel': {'args': ['Chi2 statistic'], 'kwargs': {}},
                      'set_ylabel': {'args': ['SNP number'], 'kwargs': {}},
                      'set_title': {'args': [title], 'kwargs': {}}}
        plot_distrib(hwe_chi2_distrib, bins, axes=axes, mpl_params=mpl_params)
        
        # Plot expected chi2 distribution
        axes = axes.twinx()
        rv = chi2(df)
        x = numpy.linspace(0, max(hwe_chi2), 1000)
        axes.plot(x, rv.pdf(x), color='b', lw=2, label='Expected Chi2')
        axes.set_ylabel('Expected Chi2 density')
    canvas.print_figure(fhand)
Пример #2
0
def plot_call_field_distribs_per_gt_type(variations, field, max_value,
                                         data_dir, chunk_size=SNPS_PER_CHUNK):
    # Field distribution per sample
    field_name = field.split('/')[-1]
    fpath = join(data_dir, '{}_distribution_per_sample.png'.format(field_name))
    mask_funcs = [call_is_het, call_is_hom]
    names = ['Heterozygous', 'Homozygous']
    distribs = []
    for mask_func in mask_funcs:
        dp_distribs, bins = calc_field_distribs_per_sample(variations,
                                                           field=field,
                                                           range_=(0, max_value),
                                                           n_bins=max_value,
                                                           chunk_size=chunk_size,
                                                           mask_func=mask_func,
                                                           mask_field=GT_FIELD)
        distribs.append(dp_distribs)
        
    title = '{} distribution per sample'.format(field_name)
    mpl_params = {'set_xlabel': {'args': ['Samples'], 'kwargs': {}},
                  'set_ylabel': {'args': [field_name], 'kwargs': {}},
                  'set_title': {'args': [title], 'kwargs': {}}}
    figsize = (variations[GT_FIELD].shape[1], 7)
    plot_boxplot_from_distribs_series(distribs, fhand=open(fpath, 'w'),
                                      mpl_params=mpl_params, figsize=figsize,
                                      colors=['pink', 'tan'],
                                      labels=names,
                                      xticklabels=variations.samples)
    
    # Overall field distributions
    fpath = join(data_dir, '{}_distribution.png'.format(field_name))
    fhand = open(fpath, 'w')
    fig = Figure(figsize=(20, 15))
    canvas = FigureCanvas(fig)
    i = 1
    for distrib, name in zip(distribs, names):
        distrib = numpy.sum(dp_distribs, axis=0)
        distrib_cum = calc_cum_distrib(distrib)
        axes = fig.add_subplot(len(names) * 100 + 20 + i)
        i += 1
        title = '{} distribution all samples {}'.format(field_name, name)
        plot_distrib(distrib, bins, axes=axes,
                     mpl_params={'set_xlabel': {'args': [field_name],
                                                'kwargs': {}},
                                 'set_ylabel': {'args': ['Number of GTs'],
                                                'kwargs': {}},
                                 'set_title': {'args': [title], 'kwargs': {}}})
        distrib_cum = distrib_cum/distrib_cum[0] * 100
        axes = fig.add_subplot(len(names) * 100 + 20 + i)
        i += 1
        title = '{} cumulative distribution all samples {}'.format(field_name,
                                                                   name)
        plot_distrib(distrib_cum, bins, axes=axes,
                     mpl_params={'set_xlabel': {'args': [field_name],
                                                'kwargs': {}},
                                 'set_ylabel': {'args': ['% calls > Depth '],
                                                'kwargs': {}},
                                 'set_title': {'args': [title], 'kwargs': {}}})
    canvas.print_figure(fhand)
Пример #3
0
def plot_allele_obs_distrib_2D(variations, data_dir, max_allele_counts,
                               chunk_size=SNPS_PER_CHUNK):
    # Allele observation distribution 2D
    masks = [call_is_het, call_is_hom_alt, call_is_hom_ref]
    names = ['Heterozygous', 'Alt Homozygous', 'Ref Homozygous']
    
    fig = Figure(figsize=(22, 25))
    canvas = FigureCanvas(fig)
    gs = gridspec.GridSpec(3, 2)
    fpath = join(data_dir, 'allele_obs_distrib_per_gt.png')
    fhand = open(fpath, 'w')
    
    counts_range = [[0, max_allele_counts], [0, max_allele_counts]]
    
    for i, (mask_func, name) in enumerate(zip(masks, names)):
        hist2d = hist2d_allele_observations(variations,
                                            n_bins=max_allele_counts,
                                            range_=counts_range,
                                            mask_func=mask_func,
                                            chunk_size=chunk_size)
        counts_distrib2d, xbins, ybins = hist2d
        
        axes = fig.add_subplot(gs[i, 0])
        title = 'Allele counts distribution 2D {}'.format(name)
        plot_hist2d(numpy.log10(counts_distrib2d), xbins, ybins, axes=axes,
                    mpl_params={'set_xlabel': {'args': ['Alt allele counts'],
                                               'kwargs': {}},
                                'set_ylabel': {'args': ['Ref allele counts'],
                                               'kwargs': {}},
                                'set_title': {'args': [title], 'kwargs': {}}},
                    colorbar_label='log10(counts)', fig=fig)

        hist2d = hist2d_gq_allele_observations(variations,
                                               n_bins=max_allele_counts,
                                               range_=counts_range,
                                               mask_func=mask_func,
                                               chunk_size=chunk_size,
                                               hist_counts=counts_distrib2d)
        gq_distrib2d, xbins, ybins = hist2d
        
        axes = fig.add_subplot(gs[i, 1])
        title = 'Allele counts GQ distribution 2D {}'.format(name)
        plot_hist2d(gq_distrib2d, xbins, ybins, axes=axes, fig=fig,
                    mpl_params={'set_xlabel': {'args': ['Alt allele counts'],
                                               'kwargs': {}},
                                'set_ylabel': {'args': ['Ref allele counts'],
                                               'kwargs': {}},
                                'set_title': {'args': [title], 'kwargs': {}}},
                    colorbar_label='Genotype Quality (GQ)')

    canvas.print_figure(fhand)
Пример #4
0
def save_data_as_graph(data,
                       path_to_save="./log/1280_fast_6conv/static/my_img.png",
                       title='None'):
    x_steps = np.arange(len(data), dtype='int32')

    f = Figure(figsize=(5, 5), dpi=100)
    canvas = FigureCanvas(f)
    a = f.add_subplot(111)
    a.set_title(title)
    a.set_xlabel('Train Step')
    a.set_ylabel('Loss')
    a.plot(x_steps.tolist(), data)

    canvas.print_figure(path_to_save)
Пример #5
0
def plot_hwe(variations,
             max_num_alleles,
             data_dir,
             ploidy=2,
             min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT,
             chunk_size=SNPS_PER_CHUNK):
    fpath = join(data_dir, 'hwe_chi2_distrib.png')
    fhand = open(fpath, 'w')
    fig = Figure(figsize=(10, 20))
    canvas = FigureCanvas(fig)

    num_alleles = range(2, max_num_alleles + 1)
    gs = gridspec.GridSpec(len(num_alleles), 1)
    for i, num_allele in enumerate(num_alleles):
        df = len(list(combinations_with_replacement(range(num_allele),
                                                    ploidy))) - num_allele

        hwe_test = calc_hwe_chi2_test(variations,
                                      num_allele=num_allele,
                                      min_num_genotypes=min_num_genotypes,
                                      chunk_size=chunk_size)
        hwe_chi2 = hwe_test[:, 0]
        hwe_chi2_distrib, bins = histogram(hwe_chi2, n_bins=50)

        # Plot observed distribution
        axes = fig.add_subplot(gs[i, 0])
        title = 'Chi2 df={} statistic values distribution'.format(df)
        mpl_params = {
            'set_xlabel': {
                'args': ['Chi2 statistic'],
                'kwargs': {}
            },
            'set_ylabel': {
                'args': ['SNP number'],
                'kwargs': {}
            },
            'set_title': {
                'args': [title],
                'kwargs': {}
            }
        }
        plot_distrib(hwe_chi2_distrib, bins, axes=axes, mpl_params=mpl_params)

        # Plot expected chi2 distribution
        axes = axes.twinx()
        rv = chi2(df)
        x = numpy.linspace(0, max(hwe_chi2), 1000)
        axes.plot(x, rv.pdf(x), color='b', lw=2, label='Expected Chi2')
        axes.set_ylabel('Expected Chi2 density')
    canvas.print_figure(fhand)
Пример #6
0
def plot_obs_het(variations, data_dir, chunk_size=SNPS_PER_CHUNK,
                 min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT):
    # Calculate observed heterozygosity distribution by snp
    _calc_obs_het_by_var = partial(calc_obs_het,
                                   min_num_genotypes=min_num_genotypes)
    distrib = histogram_for_chunks(variations, calc_funct=_calc_obs_het_by_var,
                                   n_bins=25, range_=(0, 1),
                                   chunk_size=chunk_size)
    obs_het_var_distrib, bins1 = distrib
    
    # Calculate observed heterozygosity distribution by sample
    obs_het_by_sample = calc_obs_het_by_sample(variations,
                                               chunk_size=chunk_size)
    obs_het_sample_distrib, bins2 = histogram(obs_het_by_sample, n_bins=25,
                                              range_=(0, 1))
    
    # Plot distributions
    fpath = join(data_dir, 'obs_het.png')
    fhand = open(fpath, 'w')
    fig = Figure(figsize=(10, 10))
    canvas = FigureCanvas(fig)
    axes = fig.add_subplot(211)
    title = 'SNP observed Heterozygosity distribution'
    plot_distrib(obs_het_var_distrib, bins=bins1, fhand=open(fpath, 'w'),
                 mpl_params={'set_xlabel': {'args': ['Heterozygosity'],
                                            'kwargs': {}},
                             'set_ylabel': {'args': ['SNP number'], 'kwargs': {}},
                             'set_title': {'args': [title], 'kwargs': {}},
                             'set_yscale': {'args': ['log'], 'kwargs': {}}},
                 axes=axes, color='c')
    axes = fig.add_subplot(212)
    title = 'Sample observed Heterozygosity distribution'
    plot_distrib(obs_het_sample_distrib, bins=bins2, fhand=open(fpath, 'w'),
                 mpl_params={'set_xlabel': {'args': ['Heterozygosity'],
                                            'kwargs': {}},
                             'set_ylabel': {'args': ['Sample number'],
                                            'kwargs': {}},
                             'set_title': {'args': [title], 'kwargs': {}}},
                 axes=axes, color='c')
    canvas.print_figure(fhand)
Пример #7
0
def plot_nucleotide_diversity_measures(variations, max_num_alleles,
                                       window_size, data_dir,
                                       chunk_size=SNPS_PER_CHUNK,
                                       write_bg=False,
                                       min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT):
    fig = Figure(figsize=(20, 20))
    canvas = FigureCanvas(fig)
    marker = 'k'
    chrom = _load_matrix(variations, CHROM_FIELD)
    pos = _load_matrix(variations, POS_FIELD)

    # Number of variable positions per bp
    snp_density = PositionalStatsCalculator(chrom, pos,
                                            numpy.ones(pos.shape),
                                            window_size=window_size,
                                            step=window_size)
    snp_density = snp_density.calc_window_stat()
    bg_fhand = open(join(data_dir, 'diversity_s.bg'), 'w')
    if write_bg:
        snp_density.write(bg_fhand, 's',
                          'SNP density in windows of {} bp'.format(window_size),
                          track_type='bedgraph')
    axes = fig.add_subplot(311)
    title = 'Nucleotide diversity measures averaged in windows of {} bp'
    title = title.format(window_size)
    mpl_params = {'set_title': {'args': [title], 'kwargs': {}},
                  'set_ylabel': {'args': ['SNPs number / bp'], 'kwargs': {}},
                  'set_ylim': {'args': [0, 1.2*numpy.max(snp_density.stat)],
                               'kwargs': {}}}
    manhattan_plot(snp_density.chrom, snp_density.pos, snp_density.stat,
                   mpl_params=mpl_params, axes=axes, ylim=0, show_chroms=False,
                   marker=marker)

    # Watterson estimator of nucleotide diversity
    n_seqs = variations[GT_FIELD].shape[1] * variations[GT_FIELD].shape[2]
    correction_factor = numpy.sum(1 / numpy.arange(1, n_seqs))
    watterson = snp_density
    watterson.stat = watterson.stat / correction_factor
    bg_fhand = open(join(data_dir, 'diversity_s.bg'), 'w')
    description = 'SNP density in windows of {} bp'.format(window_size)
    if write_bg:
        watterson.write(bg_fhand, 's', description, track_type='bedgraph')
    axes = fig.add_subplot(312)
    mpl_params={'set_ylabel': {'args': ['Watterson estimator'], 'kwargs': {}},
                'set_ylim': {'args': [0, 1.2*numpy.max(watterson.stat)],
                             'kwargs': {}}}
    manhattan_plot(watterson.chrom, watterson.pos, watterson.stat,
                   mpl_params=mpl_params, axes=axes, ylim=0, show_chroms=False,
                   marker=marker)

    # Expected heterozygosity (Pi)
    exp_het = calc_expected_het(variations, chunk_size=chunk_size,
                                min_num_genotypes=min_num_genotypes)
    pi = PositionalStatsCalculator(chrom, pos, exp_het,
                                   window_size=window_size, step=window_size)
    pi = pi.calc_window_stat()
    bg_fhand = open(join(data_dir, 'diversity_pi.bg'), 'w')
    description = 'Pi in windows of {} bp'.format(window_size)
    if write_bg:
        pi.write(bg_fhand, 's', description, track_type='bedgraph')
    axes = fig.add_subplot(313)
    mpl_params={'set_xlabel': {'args': ['Chromosome'], 'kwargs': {}},
                'set_ylabel': {'args': ['Pi'], 'kwargs': {}},
                'set_ylim': {'args': [0, 1.2*numpy.max(pi.stat)],
                             'kwargs': {}}}
    manhattan_plot(pi.chrom, pi.pos, pi.stat, axes=axes, ylim=0, marker=marker,
                   mpl_params=mpl_params)
    canvas.print_figure(open(join(data_dir, 'nucleotide_diversity.png'), 'w'))
Пример #8
0
def plot_nucleotide_diversity_measures(
        variations,
        max_num_alleles,
        window_size,
        data_dir,
        chunk_size=SNPS_PER_CHUNK,
        write_bg=False,
        min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT):
    fig = Figure(figsize=(20, 20))
    canvas = FigureCanvas(fig)
    marker = 'k'
    chrom = _load_matrix(variations, CHROM_FIELD)
    pos = _load_matrix(variations, POS_FIELD)

    # Number of variable positions per bp
    snp_density = PositionalStatsCalculator(chrom,
                                            pos,
                                            numpy.ones(pos.shape),
                                            window_size=window_size,
                                            step=window_size)
    snp_density = snp_density.calc_window_stat()
    bg_fhand = open(join(data_dir, 'diversity_s.bg'), 'w')
    if write_bg:
        snp_density.write(
            bg_fhand,
            's',
            'SNP density in windows of {} bp'.format(window_size),
            track_type='bedgraph')
    axes = fig.add_subplot(311)
    title = 'Nucleotide diversity measures averaged in windows of {} bp'
    title = title.format(window_size)
    mpl_params = {
        'set_title': {
            'args': [title],
            'kwargs': {}
        },
        'set_ylabel': {
            'args': ['SNPs number / bp'],
            'kwargs': {}
        },
        'set_ylim': {
            'args': [0, 1.2 * numpy.max(snp_density.stat)],
            'kwargs': {}
        }
    }
    manhattan_plot(snp_density.chrom,
                   snp_density.pos,
                   snp_density.stat,
                   mpl_params=mpl_params,
                   axes=axes,
                   ylim=0,
                   show_chroms=False,
                   marker=marker)

    # Watterson estimator of nucleotide diversity
    n_seqs = variations[GT_FIELD].shape[1] * variations[GT_FIELD].shape[2]
    correction_factor = numpy.sum(1 / numpy.arange(1, n_seqs))
    watterson = snp_density
    watterson.stat = watterson.stat / correction_factor
    bg_fhand = open(join(data_dir, 'diversity_s.bg'), 'w')
    description = 'SNP density in windows of {} bp'.format(window_size)
    if write_bg:
        watterson.write(bg_fhand, 's', description, track_type='bedgraph')
    axes = fig.add_subplot(312)
    mpl_params = {
        'set_ylabel': {
            'args': ['Watterson estimator'],
            'kwargs': {}
        },
        'set_ylim': {
            'args': [0, 1.2 * numpy.max(watterson.stat)],
            'kwargs': {}
        }
    }
    manhattan_plot(watterson.chrom,
                   watterson.pos,
                   watterson.stat,
                   mpl_params=mpl_params,
                   axes=axes,
                   ylim=0,
                   show_chroms=False,
                   marker=marker)

    # Expected heterozygosity (Pi)
    exp_het = calc_expected_het(variations,
                                chunk_size=chunk_size,
                                min_num_genotypes=min_num_genotypes)
    pi = PositionalStatsCalculator(chrom,
                                   pos,
                                   exp_het,
                                   window_size=window_size,
                                   step=window_size)
    pi = pi.calc_window_stat()
    bg_fhand = open(join(data_dir, 'diversity_pi.bg'), 'w')
    description = 'Pi in windows of {} bp'.format(window_size)
    if write_bg:
        pi.write(bg_fhand, 's', description, track_type='bedgraph')
    axes = fig.add_subplot(313)
    mpl_params = {
        'set_xlabel': {
            'args': ['Chromosome'],
            'kwargs': {}
        },
        'set_ylabel': {
            'args': ['Pi'],
            'kwargs': {}
        },
        'set_ylim': {
            'args': [0, 1.2 * numpy.max(pi.stat)],
            'kwargs': {}
        }
    }
    manhattan_plot(pi.chrom,
                   pi.pos,
                   pi.stat,
                   axes=axes,
                   ylim=0,
                   marker=marker,
                   mpl_params=mpl_params)
    canvas.print_figure(open(join(data_dir, 'nucleotide_diversity.png'), 'w'))
Пример #9
0
def plot_allele_obs_distrib_2D(variations,
                               data_dir,
                               max_allele_counts,
                               chunk_size=SNPS_PER_CHUNK):
    # Allele observation distribution 2D
    masks = [call_is_het, call_is_hom_alt, call_is_hom_ref]
    names = ['Heterozygous', 'Alt Homozygous', 'Ref Homozygous']

    fig = Figure(figsize=(22, 25))
    canvas = FigureCanvas(fig)
    gs = gridspec.GridSpec(3, 2)
    fpath = join(data_dir, 'allele_obs_distrib_per_gt.png')
    fhand = open(fpath, 'w')

    counts_range = [[0, max_allele_counts], [0, max_allele_counts]]

    for i, (mask_func, name) in enumerate(zip(masks, names)):
        hist2d = hist2d_allele_observations(variations,
                                            n_bins=max_allele_counts,
                                            range_=counts_range,
                                            mask_func=mask_func,
                                            chunk_size=chunk_size)
        counts_distrib2d, xbins, ybins = hist2d

        axes = fig.add_subplot(gs[i, 0])
        title = 'Allele counts distribution 2D {}'.format(name)
        plot_hist2d(numpy.log10(counts_distrib2d),
                    xbins,
                    ybins,
                    axes=axes,
                    mpl_params={
                        'set_xlabel': {
                            'args': ['Alt allele counts'],
                            'kwargs': {}
                        },
                        'set_ylabel': {
                            'args': ['Ref allele counts'],
                            'kwargs': {}
                        },
                        'set_title': {
                            'args': [title],
                            'kwargs': {}
                        }
                    },
                    colorbar_label='log10(counts)',
                    fig=fig)

        hist2d = hist2d_gq_allele_observations(variations,
                                               n_bins=max_allele_counts,
                                               range_=counts_range,
                                               mask_func=mask_func,
                                               chunk_size=chunk_size,
                                               hist_counts=counts_distrib2d)
        gq_distrib2d, xbins, ybins = hist2d

        axes = fig.add_subplot(gs[i, 1])
        title = 'Allele counts GQ distribution 2D {}'.format(name)
        plot_hist2d(gq_distrib2d,
                    xbins,
                    ybins,
                    axes=axes,
                    fig=fig,
                    mpl_params={
                        'set_xlabel': {
                            'args': ['Alt allele counts'],
                            'kwargs': {}
                        },
                        'set_ylabel': {
                            'args': ['Ref allele counts'],
                            'kwargs': {}
                        },
                        'set_title': {
                            'args': [title],
                            'kwargs': {}
                        }
                    },
                    colorbar_label='Genotype Quality (GQ)')

    canvas.print_figure(fhand)
Пример #10
0
def plot_call_field_distribs_per_gt_type(variations,
                                         field,
                                         max_value,
                                         data_dir,
                                         chunk_size=SNPS_PER_CHUNK):
    # Field distribution per sample
    field_name = field.split('/')[-1]
    fpath = join(data_dir, '{}_distribution_per_sample.png'.format(field_name))
    mask_funcs = [call_is_het, call_is_hom]
    names = ['Heterozygous', 'Homozygous']
    distribs = []
    for mask_func in mask_funcs:
        dp_distribs, bins = calc_field_distribs_per_sample(
            variations,
            field=field,
            range_=(0, max_value),
            n_bins=max_value,
            chunk_size=chunk_size,
            mask_func=mask_func,
            mask_field=GT_FIELD)
        distribs.append(dp_distribs)

    title = '{} distribution per sample'.format(field_name)
    mpl_params = {
        'set_xlabel': {
            'args': ['Samples'],
            'kwargs': {}
        },
        'set_ylabel': {
            'args': [field_name],
            'kwargs': {}
        },
        'set_title': {
            'args': [title],
            'kwargs': {}
        }
    }
    figsize = (variations[GT_FIELD].shape[1], 7)
    plot_boxplot_from_distribs_series(distribs,
                                      fhand=open(fpath, 'w'),
                                      mpl_params=mpl_params,
                                      figsize=figsize,
                                      colors=['pink', 'tan'],
                                      labels=names,
                                      xticklabels=variations.samples)

    # Overall field distributions
    fpath = join(data_dir, '{}_distribution.png'.format(field_name))
    fhand = open(fpath, 'w')
    fig = Figure(figsize=(20, 15))
    canvas = FigureCanvas(fig)
    i = 1
    for distrib, name in zip(distribs, names):
        distrib = numpy.sum(dp_distribs, axis=0)
        distrib_cum = calc_cum_distrib(distrib)
        axes = fig.add_subplot(len(names) * 100 + 20 + i)
        i += 1
        title = '{} distribution all samples {}'.format(field_name, name)
        plot_distrib(distrib,
                     bins,
                     axes=axes,
                     mpl_params={
                         'set_xlabel': {
                             'args': [field_name],
                             'kwargs': {}
                         },
                         'set_ylabel': {
                             'args': ['Number of GTs'],
                             'kwargs': {}
                         },
                         'set_title': {
                             'args': [title],
                             'kwargs': {}
                         }
                     })
        distrib_cum = distrib_cum / distrib_cum[0] * 100
        axes = fig.add_subplot(len(names) * 100 + 20 + i)
        i += 1
        title = '{} cumulative distribution all samples {}'.format(
            field_name, name)
        plot_distrib(distrib_cum,
                     bins,
                     axes=axes,
                     mpl_params={
                         'set_xlabel': {
                             'args': [field_name],
                             'kwargs': {}
                         },
                         'set_ylabel': {
                             'args': ['% calls > Depth '],
                             'kwargs': {}
                         },
                         'set_title': {
                             'args': [title],
                             'kwargs': {}
                         }
                     })
    canvas.print_figure(fhand)
Пример #11
0
def plot_obs_het(variations,
                 data_dir,
                 chunk_size=SNPS_PER_CHUNK,
                 min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT):
    # Calculate observed heterozygosity distribution by snp
    _calc_obs_het_by_var = partial(calc_obs_het,
                                   min_num_genotypes=min_num_genotypes)
    distrib = histogram_for_chunks(variations,
                                   calc_funct=_calc_obs_het_by_var,
                                   n_bins=25,
                                   range_=(0, 1),
                                   chunk_size=chunk_size)
    obs_het_var_distrib, bins1 = distrib

    # Calculate observed heterozygosity distribution by sample
    obs_het_by_sample = calc_obs_het_by_sample(variations,
                                               chunk_size=chunk_size)
    obs_het_sample_distrib, bins2 = histogram(obs_het_by_sample,
                                              n_bins=25,
                                              range_=(0, 1))

    # Plot distributions
    fpath = join(data_dir, 'obs_het.png')
    fhand = open(fpath, 'w')
    fig = Figure(figsize=(10, 10))
    canvas = FigureCanvas(fig)
    axes = fig.add_subplot(211)
    title = 'SNP observed Heterozygosity distribution'
    plot_distrib(obs_het_var_distrib,
                 bins=bins1,
                 fhand=open(fpath, 'w'),
                 mpl_params={
                     'set_xlabel': {
                         'args': ['Heterozygosity'],
                         'kwargs': {}
                     },
                     'set_ylabel': {
                         'args': ['SNP number'],
                         'kwargs': {}
                     },
                     'set_title': {
                         'args': [title],
                         'kwargs': {}
                     },
                     'set_yscale': {
                         'args': ['log'],
                         'kwargs': {}
                     }
                 },
                 axes=axes,
                 color='c')
    axes = fig.add_subplot(212)
    title = 'Sample observed Heterozygosity distribution'
    plot_distrib(obs_het_sample_distrib,
                 bins=bins2,
                 fhand=open(fpath, 'w'),
                 mpl_params={
                     'set_xlabel': {
                         'args': ['Heterozygosity'],
                         'kwargs': {}
                     },
                     'set_ylabel': {
                         'args': ['Sample number'],
                         'kwargs': {}
                     },
                     'set_title': {
                         'args': [title],
                         'kwargs': {}
                     }
                 },
                 axes=axes,
                 color='c')
    canvas.print_figure(fhand)
Пример #12
0
class zorroPlot(object):
    def __init__(self,
                 filename=None,
                 width=7,
                 height=7,
                 dpi=144,
                 facecolor=[0.75, 0.75, 0.75, 1.0],
                 MplCanvas=None,
                 backend=u'Qt4Agg'):
        """
        Object-oriented plotting interface for Zorro.
        """
        # All parameters are stored in a hash-dictionary
        self.plotDict = {}
        self.plotDict[u'width'] = width
        self.plotDict[u'height'] = height
        self.plotDict[u'dpi'] = dpi
        self.plotDict[u'facecolor'] = facecolor

        if bool(filename):
            print("TODO: load and display file from zorroPlot.__init__()")

        # http://stackoverflow.com/questions/13714454/specifying-and-saving-a-figure-with-exact-size-in-pixels
        self.fig = matplotlib.figure.Figure(figsize=(width, height),
                                            facecolor=facecolor,
                                            dpi=dpi)
        # This forces the plot window to cover the entire space by default
        self.axes = self.fig.add_axes([0.0, 0.0, 1.0, 1.0])
        self.axes.hold(
            False)  # We want the axes cleared every time plot() is called
        self.axes2 = None

        self.cmaps_cycle = itertools.cycle(
            [u"gray", u"gnuplot", u"jet", u"nipy_spectral"])
        self.plotDict[u'image_cmap'] = next(
            self.cmaps_cycle)  # Pre-cycle once...
        self.plotDict[u'graph_cmap'] = u"gnuplot"
        self.plotDict[
            u'showBoxes'] = False  # Try to load imageSum_boxMask.png as an overlay
        self.plotDict[u'colorbar'] = True

        if bool(MplCanvas):
            # Avoid calling anything that would require importing PySide here, as we don't want it as an
            # explicit dependancy.
            self.canvas = MplCanvas
        else:
            if backend.lower(
            ) == u'agg':  # CANNOT RENDER TO SCREEN, PRINTING ONLY
                from matplotlib.backends.backend_agg import FigureCanvas
            elif backend.lower() == u'qt4' or backend.lower() == u'qt4agg':
                from matplotlib.backends.backend_qt4agg import FigureCanvas
            elif backend.lower() == u'qt5' or backend.lower() == u'qt5agg':
                from matplotlib.backends.backend_qt5agg import FigureCanvas
            else:  # default is qt4agg
                from matplotlib.backends.backend_qt4agg import FigureCanvas

            self.canvas = FigureCanvas(self.fig)

            try:
                self.canvas.updateGeometry()
            except:
                pass

        pass

    def updateCanvas(self):
        """
        Updates a (Qt4Agg) FigureCanvas.  Typically an automator.MplCanvas type.
        """
        try:
            self.canvas.updateGeometry()
        except:
            pass
        #self.canvas.draw() # Necessary with show?
        self.canvas.show()

    def printAndReturn(self):
        """
        Any following commands shared amongst all plot functions go here for brevity.
        """
        if 'title' in self.plotDict:
            self.axes.set_title(self.plotDict['title'])

        try:
            self.canvas.updateGeometry()
        except:
            pass
        if u'plotFile' in self.plotDict and bool(self.plotDict['plotFile']):
            if self.plotDict[u'Transparent']:
                color = [0, 0, 0, 0]
            else:
                color = [1, 1, 1, 1]

            self.canvas.print_figure(self.plotDict[u'plotFile'],
                                     dpi=self.plotDict[u'dpi'],
                                     facecolor=color,
                                     edgecolor=color)
            return self.plotDict[u'plotFile']

    def plotEmpty(self):
        self.fig.clear()
        self.axes = self.fig.add_axes([0.0, 0.0, 1.0, 1.0])
        self.axes.hold(False)
        self.axes.plot([0.0, 1.0], [0.0, 1.0], 'k-')
        self.axes.hold(True)
        self.axes.plot([0.0, 1.0], [1.0, 0.0], 'k-')
        self.axes.text(0.45, 0.25, "No data", fontsize=18)
        self.axes.hold(False)
        self.axes.set_axis_off()

    def plotPixmap(self):
        self.fig.clear()
        self.axes = self.fig.add_axes([0.0, 0.0, 1.0, 1.0])
        self.axes.hold(True)
        if u'pixmap' in self.plotDict:
            mage = self.axes.imshow(self.plotDict[u'pixmap'],
                                    interpolation='sinc')
            self.axes.set_axis_off()
            if u'boxMask' in self.plotDict and np.any(
                    self.plotDict[u'boxMask']):
                print("pixmap boxes")
                #scaleDiff = np.array( self.plotDict['pixmap'].shape ) / np.array( self.plotDict['boxMask'].shape )

                self.axes.imshow(self.plotDict[u'boxMask'],
                                 extent=mage.get_extent())

        else:
            print("No pixmap")

        self.axes.hold(False)

    def plotImage(self):
        self.fig.clear()
        self.axes = self.fig.add_axes([0.0, 0.0, 1.0, 1.0])

        clim = zorro.util.histClim(self.plotDict['image'], cutoff=1E-4)
        self.axes.hold(True)
        mage = self.axes.imshow(self.plotDict['image'],
                                vmin=clim[0],
                                vmax=clim[1],
                                interpolation='nearest',
                                cmap=self.plotDict['image_cmap'])

        if 'pixelsize' in self.plotDict:
            zorro.util.plotScalebar(mage, self.plotDict['pixelsize'])
        if bool(self.plotDict['colorbar']):
            self.fig.colorbar(mage, fraction=0.046, pad=0.04)
        self.axes.set_axis_off()
        self.axes.hold(False)

        return self.printAndReturn()

    def plotStack(self):
        print("TODO: implement plotStack")

    def plotFFT(self):
        self.fig.clear()
        self.axes = self.fig.add_axes([0.0, 0.0, 1.0, 1.0])
        self.axes.hold(False)

        FFTimage = np.log10(
            1.0 + np.abs(np.fft.fftshift(np.fft.fft2(self.plotDict['image']))))
        FFTclim = zorro.util.ciClim(FFTimage, sigma=1.5)
        mage = self.axes.imshow(FFTimage,
                                interpolation='bicubic',
                                vmin=FFTclim[0],
                                vmax=FFTclim[1],
                                cmap=self.plotDict['image_cmap'])
        if 'pixelsize' in self.plotDict:
            inv_ps = 1.0 / (FFTimage.shape[0] * self.plotDict['pixelsize'])
            zorro.util.plotScalebar(mage, inv_ps, units=u'nm^{-1}')
        self.axes.set_axis_off()
        if bool(self.plotDict['colorbar']):
            self.fig.colorbar(mage, fraction=0.046, pad=0.04)
        return self.printAndReturn()

    def plotPolarFFT(self):
        self.fig.clear()
        self.axes = self.fig.add_axes([0.0, 0.0, 1.0, 1.0])
        self.axes.hold(False)

        polarFFTimage = zorro.util.img2polar(
            np.log10(
                1.0 +
                np.abs(np.fft.fftshift(np.fft.fft2(self.plotDict['image'])))))
        FFTclim = zorro.util.ciClim(polarFFTimage, sigma=1.5)
        mage = self.axes.imshow(polarFFTimage,
                                interpolation='bicubic',
                                vmin=FFTclim[0],
                                vmax=FFTclim[1],
                                cmap=self.plotDict['image_cmap'])
        if 'pixlsize' in self.plotDict:
            # Egh, this scalebar is sort of wrong, maybe I should transpose the plot?
            inv_ps = 1.0 / (polarFFTimage.shape[0] *
                            self.plotDict['pixelsize'])
            zorro.util.plotScalebar(mage, inv_ps, units=u'nm^{-1}')
        self.axes.set_axis_off()
        if bool(self.plotDict['colorbar']):
            self.fig.colorbar(mage, fraction=0.046, pad=0.04)

        return self.printAndReturn()

    # TODO: render Gautoauto outputs?  Maybe I should make the Gautomatch boxes seperately as a largely
    # transparent plot, and just add it on top or not?

    def plotCorrTriMat(self):
        self.fig.clear()
        self.axes = self.fig.add_axes([0.0, 0.0, 1.0, 1.0])
        self.axes.hold(False)

        corrtri = self.plotDict['corrTriMat']
        clim = [
            np.min(corrtri[corrtri > 0.0]) * 0.75,
            np.max(corrtri[corrtri > 0.0])
        ]
        corrmap = self.axes.imshow(corrtri,
                                   interpolation='nearest',
                                   vmin=clim[0],
                                   vmax=clim[1],
                                   cmap=self.plotDict['graph_cmap'])
        self.axes.set_xlabel("Base image")
        self.axes.set_ylabel("Template image")
        if bool(self.plotDict['colorbar']):
            self.fig.colorbar(corrmap, fraction=0.046, pad=0.04)

        return self.printAndReturn()

    def plotPeaksigTriMat(self):
        self.fig.clear()
        self.axes = self.fig.add_axes([0.0, 0.0, 1.0, 1.0])
        self.axes.hold(False)

        peaksig = self.plotDict['peaksigTriMat']
        clim = [
            np.min(peaksig[peaksig > 0.0]) * 0.75,
            np.max(peaksig[peaksig > 0.0])
        ]
        psmap = self.axes.imshow(peaksig,
                                 interpolation='nearest',
                                 vmin=clim[0],
                                 vmax=clim[1],
                                 cmap=self.plotDict['graph_cmap'])
        self.axes.set_xlabel("Base image")
        self.axes.set_ylabel("Template image")
        if bool(self.plotDict['colorbar']):
            self.fig.colorbar(psmap, fraction=0.046, pad=0.04)

        return self.printAndReturn()

    def plotTranslations(self):
        # rect is [left,bottom,width,height]
        self.fig.clear()
        self.axes = self.fig.add_axes([0.12, 0.1, 0.85, 0.85])
        self.axes.hold(False)

        if 'errorX' in self.plotDict:
            self.axes.errorbar(self.plotDict['translations'][:, 1],
                               self.plotDict['translations'][:, 0],
                               fmt='k-',
                               xerr=self.plotDict['errorX'],
                               yerr=self.plotDict['errorY'])
        else:
            self.axes.plot(self.plotDict['translations'][:, 1],
                           self.plotDict['translations'][:, 0],
                           'k.-',
                           linewidth=2.0,
                           markersize=16)

        self.axes.set_xlabel('X-axis drift (pix)')
        self.axes.set_ylabel('Y-axis drift (pix)')
        self.axes.axis('equal')

        return self.printAndReturn()

    def plotPixRegError(self):
        self.fig.clear()
        self.axes = self.fig.add_subplot(211)
        self.axes.hold(False)
        self.axes2 = self.fig.add_subplot(212)
        self.axes2.hold(False)

        errorX = np.abs(self.plotDict['errorXY'][:, 1])
        errorY = np.abs(self.plotDict['errorXY'][:, 0])

        meanErrX = np.mean(errorX)
        meanErrY = np.mean(errorY)
        stdErrX = np.std(errorX)
        stdErrY = np.std(errorY)

        self.axes.semilogy(errorX,
                           '.:',
                           linewidth=1.5,
                           color='black',
                           markersize=12,
                           markerfacecolor='darkslateblue',
                           label='X: %.3f +/- %.3f pix' % (meanErrX, stdErrX))
        self.axes.legend(fontsize=12, loc='best')
        self.axes.set_ylabel("X-error estimate (pix)")

        # self.axes.set_title( 'X: %f +/- %f'%(meanErrX, stdErrX) )
        self.axes2.semilogy(errorY,
                            '.:',
                            linewidth=1.5,
                            color='black',
                            markersize=12,
                            markerfacecolor='darkolivegreen',
                            label='Y: %.3f +/- %.3f pix' % (meanErrY, stdErrY))
        #self.axes2.set_title( 'Y: %f +/- %f pix'%(meanErrY, stdErrY) )
        self.axes2.legend(fontsize=12, loc='best')
        self.axes2.set_xlabel("Equation number")
        self.axes2.set_ylabel("Y-error estimate (pix)")

        return self.printAndReturn()

    def plotLogisticWeights(self):
        self.fig.clear()
        self.axes = self.fig.add_axes([0.12, 0.1, 0.80, 0.85])
        self.axes.hold(False)

        pixError = np.sqrt(self.plotDict['errorXY'][:, 0]**2 +
                           self.plotDict['errorXY'][:, 1]**2)
        peaksigVect = self.plotDict['peaksigVect']

        # Mixing a log-plot with a linear-plot in a plotyy style.
        self.axes.semilogy(peaksigVect, pixError, 'k.')
        # ax1.plot( peaksigVect, pixError, 'k.' )
        self.axes.set_xlabel('Correlation peak significance, $\sigma$')
        self.axes.set_ylabel('Pixel registration error')
        self.axes.set_ylim([0, 1])
        self.axes.set_ylim([1E-2, 1E2])
        self.axes.set_xlim(peaksigVect.min(), peaksigVect.max())

        if 'peaksigThres' in self.plotDict:
            # Twinx not working with custom sizes?
            self.axes2 = self.axes.twinx()
            self.fig.add_axes(self.axes2)
            # Plot threshold sigma value
            self.axes2.plot(
                [self.plotDict['peaksigThres'], self.plotDict['peaksigThres']],
                [0.0, 1.0],
                '--',
                color='firebrick',
                label=r'$\sigma_{thres} = %.2f$' %
                self.plotDict['peaksigThres'])

            # Plot the logistics curve
            peakSig = np.arange(np.min(peaksigVect), np.max(peaksigVect), 0.05)

            weights = zorro.util.logistic(peakSig,
                                          self.plotDict['peaksigThres'],
                                          self.plotDict['logisticK'],
                                          self.plotDict['logisticNu'])
            self.axes2.plot(
                peakSig,
                weights,
                label=r"Weights $K=%.2f$, $\nu=%.3f$" %
                (self.plotDict['logisticK'], self.plotDict['logisticNu']),
                color='royalblue')

            if 'cdfPeaks' in self.plotDict:
                self.axes2.plot(self.plotDict['hSigma'],
                                self.plotDict['cdfPeaks'],
                                '+',
                                label=r'$\sigma-$CDF',
                                color='slateblue')

        lines1, labels1 = self.axes.get_legend_handles_labels()
        if bool(self.axes2):
            lines2, labels2 = self.axes2.get_legend_handles_labels()
            self.axes2.legend(lines1 + lines2,
                              labels1 + labels2,
                              loc='best',
                              fontsize=14)
        else:
            self.axes.legend(lines1, labels1, loc='best', fontsize=14)

        return self.printAndReturn()

    def plotFRC(self):
        self.fig.clear()
        self.axes = self.fig.add_axes([0.12, 0.1, 0.85, 0.85])
        self.axes.hold(False)

        if not np.any(self.plotDict['FRC']):
            print("Warning, zorro_plotting: FRC is empty")
            return

        FRC = self.plotDict['FRC']

        inv_ps = 1.0 / (2.0 * FRC.size * self.plotDict['pixelsize'])
        freqAxis = np.arange(FRC.size) * inv_ps

        # This is really ugly curve fitting here
        #splineFRC = UnivariateSpline( freqAxis, FRC, s = 2.0 )
        #splineAxis = np.linspace( freqAxis.min(), freqAxis.max(), 2048 )
        # Maybe try fitting to a defocus OTF, it might be faster than the spline fitting.

        self.axes.hold(True)
        #self.axes.plot( splineAxis, splineFRC(splineAxis), 'r-' )
        self.axes.plot(freqAxis,
                       FRC,
                       color='firebrick',
                       marker='.',
                       markerfacecolor='k',
                       markeredgecolor='k',
                       label=self.plotDict['labelText'])
        self.axes.set_xlabel(r"Spatial frequency, $q$ ($nm^{-1}$)")
        self.axes.set_xlim([freqAxis.min(), freqAxis.max()])
        self.axes.set_ylabel("Fourier ring correlation")
        self.axes.legend(loc='best')
        self.axes.hold(False)

        return self.printAndReturn()

    def plotCTFDiag(self):
        self.fig.clear()
        self.axes = self.fig.add_axes([0.0, 0.0, 1.0, 1.0])
        self.axes.hold(False)

        #print( "DEBUG: CTF4Diag shape = " + str(self.plotDict['CTF4Diag'].shape) )
        #print( "DEBUG: CTF4Diag dtype = " + str(self.plotDict['CTF4Diag'].dtype) )

        CTFInfo = self.plotDict['CTFInfo']
        try:
            mapCTF = self.axes.imshow(self.plotDict['CTFDiag'],
                                      cmap=self.plotDict['image_cmap'])
        except:
            print(
                "WARNING: Could not render CTF Diagnostic image, TODO: switch to disk version"
            )
            # print( " CTFDiag.shape = " + str( self.plotDict['CTFDiag'].shape ) + ", dtype = " + str( self.plotDict['CTFDiag'].dtype) )
            # Try the dead version instead?  I need checks in the plotting functions to see if the data
            # exists and if not nicely switch to live/dead
            return

        if 'pixelsize' in self.plotDict:
            inv_ps = 1.0 / (self.plotDict['CTFDiag'].shape[0] *
                            self.plotDict['pixelsize'])
            zorro.util.plotScalebar(mapCTF, inv_ps, units=u'nm^{-1}')

        if 'title' in self.plotDict:
            self.title = self.plotDict['title']

        results = (u"$DF_1:\/%.1f\/\AA$\n" % CTFInfo['DefocusU'] +
                   u"$DF_2:\/%.1f\/\AA$\n" % CTFInfo['DefocusV'] +
                   u"$\gamma:\/%.1f^\circ$\n" % CTFInfo['DefocusAngle'] +
                   u"$R:\/%.3f$\n" % CTFInfo['CtfFigureOfMerit'] +
                   u"$Fit\/res:\/%.1f\/\AA$" % CTFInfo['FinalResolution'])

        infobox = matplotlib.offsetbox.AnchoredText(results,
                                                    pad=0.5,
                                                    loc=1,
                                                    prop={'size': 13})
        self.axes.add_artist(infobox)

        self.axes.set_axis_off()  # This is still not cropping properly...

        return self.printAndReturn()

    def plotStats(self):
        # Setup unicode statistics dictionary
        #matplotlib.rc('font', family='DejaVu Sans')

        statsDict = collections.OrderedDict()
        if 'pixlsize' in self.plotDict:
            statsDict[
                u'Pixel size (nm):'] = "%.4f" % self.plotDict['pixelsize']
        if 'voltage' in self.plotDict:
            statsDict[u'Accelerating voltage (kV):'] = "%.1f" % self.plotDict[
                'voltage']
        if 'C3' in self.plotDict:
            statsDict[
                u'Spherical aberration, C3 (mm):'] = "%.1f" % self.plotDict[
                    'C3']

        if 'meanPeaksig' in self.plotDict:
            statsDict[u'Peak significance:'] = u"%.2f" % self.plotDict[
                'meanPeaksig'] + u" ± %.2f" % self.plotDict['stdPeaksig']

        try:
            CTFInfo = self.plotDict['CTFInfo']
            statsDict[u'CTF defocus #1 (Å):'] = "%.1f" % CTFInfo['DefocusU']
            statsDict[u'CTF defocus #2 (Å):'] = "%.1f" % CTFInfo['DefocusV']
            statsDict[u'CTF gamma (°):'] = "%.4f" % CTFInfo['DefocusAngle']
            statsDict[u'CTF correlation coefficient :'] = "%.5f" % CTFInfo[
                'CtfFigureOfMerit']
            statsDict[u'CTF maximum fit frequency (Å) :'] = "%.1f" % CTFInfo[
                'FinalResolution']
        except:
            pass

        # Print the statistical metrics
        self.fig.clear()
        self.axes.get_xaxis().set_visible(False)
        self.axes.get_yaxis().set_visible(False)
        fontsize = 12
        fontfigspacing = float(
            fontsize * 1.5) / (self.fig.dpi * self.fig.get_size_inches()[1])
        keycount = 0
        for key, value in statsDict.items():
            self.fig.text(fontfigspacing,
                          1 - (1 + keycount) * fontfigspacing,
                          key,
                          size=fontsize)
            self.fig.text(0.5 + fontfigspacing,
                          1 - (1 + keycount) * fontfigspacing,
                          value,
                          size=fontsize)
            keycount += 1

        return self.printAndReturn()