示例#1
0
def bed_meta2(bed_ls,
              bed_label_ls,
              icshape_out_ls,
              savefn,
              extend=20,
              species='human'):
    fa_dict = util.read_fa(fa=None, species=species, pureID=1)
    fig, ax = plt.subplots(figsize=(16, 8))
    for bed, bed_label, icshape_out in zip(bed_ls, bed_label_ls,
                                           icshape_out_ls):
        print(bed, bed_label, icshape_out)
        region_mean, entry_num, df = single_bed_meta(bed, icshape_out, extend,
                                                     savefn, bed_label,
                                                     fa_dict)
        print(region_mean)
        print(df.head())
        ax.plot(region_mean,
                label="%s(n=%s)" % (bed_label, str(entry_num)),
                marker='.')


#     plt.axvspan(extend, len(region_mean)-extend-1, color='grey', alpha=0.5)
    ax.set_ylim(0, 0.6)
    plt.legend()
    plt.tight_layout()
    plt.savefig(savefn)
    plt.close()
示例#2
0
def plot_shape_tx_null_pct(out1=None,
                           out2=None,
                           out1_label='True',
                           out2_label='Predict',
                           savefn=None,
                           species='human'):
    out_dict1 = util.read_icshape_out(out1)
    out_dict2 = util.read_icshape_out(out2)
    tx_common = set(out_dict1.keys()) & set(out_dict2.keys())
    null_pct1_ls = []
    null_pct2_ls = []
    for tx in tx_common:
        null_pct1 = (out_dict1[tx]['reactivity_ls'].count('NULL') +
                     out_dict1[tx]['reactivity_ls'].count('-1.0') +
                     out_dict1[tx]['reactivity_ls'].count('-1')) / float(
                         out_dict1[tx]['length'])
        null_pct2 = (out_dict2[tx]['reactivity_ls'].count('NULL') +
                     out_dict2[tx]['reactivity_ls'].count('-1.0') +
                     out_dict1[tx]['reactivity_ls'].count('-1')) / float(
                         out_dict2[tx]['length'])
        null_pct1_ls.append(null_pct1)
        null_pct2_ls.append(null_pct2)
    print('{}: n={}'.format(out1, len(out_dict1)))
    print('{}: n={}'.format(out2, len(out_dict2)))
    print('common tx: n={}'.format(len(tx_common)))

    fa_dict = util.read_fa(fa=None, species=species, pureID=1)
    stat1 = util.shape_dict_stat(out_dict1,
                                 fa_dict,
                                 None,
                                 RNA_type=None,
                                 trim5Len=5,
                                 trim3Len=30)
    stat2 = util.shape_dict_stat(out_dict2,
                                 fa_dict,
                                 None,
                                 RNA_type=None,
                                 trim5Len=5,
                                 trim3Len=30)
    print(pd.DataFrame.from_dict(stat1, orient='index'),
          pd.DataFrame.from_dict(stat2, orient='index'))

    df = pd.DataFrame.from_dict({
        out1_label: null_pct1_ls,
        out2_label: null_pct2_ls
    })
    print(df.head())
    fig, ax = plt.subplots(figsize=(6, 6))
    sns.scatterplot(x=out1_label, y=out2_label, data=df, ax=ax, s=10)
    plt.xlabel('{} (null_pct: {:.2f})'.format(out1_label,
                                              stat1['total_bases(NULL_pct)']))
    plt.ylabel('{} (null_pct: {:.2f})'.format(out2_label,
                                              stat2['total_bases(NULL_pct)']))
    plt.tight_layout()
    plt.savefig(savefn)
    plt.close()

    return stat1, stat2
def search(species='human', savefn=None):
    fa_dict = util.read_fa(fa=None, species=species, pureID=1)
    with open(savefn, 'w') as SAVEFN:
        p = re.compile(r'[AG][AG]AC[ATC]')
        n = 0
        for i, j in fa_dict.items():
            for m in p.finditer(j[0:]):
                n += 1
                #                 SAVEFN.write('\t'.join([i, str(m.span()[0]), str(m.span()[1]), str(n)+'_'+m.group(), '0', '+'])+'\n')
                SAVEFN.write('\t'.join([
                    i,
                    str(m.span()[0] + 2),
                    str(m.span()[0] + 3),
                    str(n) + '_' + m.group(), '0', '+'
                ]) + '\n')
def generate_mask_region_validate(shape_out,
                                  tx,
                                  species,
                                  mask_start,
                                  mask_end,
                                  fragment_start,
                                  fragment_end,
                                  savefn_dir,
                                  plot_gradient=1):
    out_dict = util.read_icshape_out(out=shape_out, pureID=1)
    fa_dict = util.read_fa(fa=None, species=species, pureID=1)

    shape_true_ls = []
    shape_mask_ls = []
    for i in range(fragment_start, fragment_end):
        r = out_dict[tx]['reactivity_ls'][i]
        if i >= mask_start and i < mask_end:
            r_mask = -1
        else:
            r_mask = r
        shape_true_ls.append(r)
        shape_mask_ls.append(r_mask)
    shape_true_ls = ['-1' if i == 'NULL' else i for i in shape_true_ls]
    shape_true_ls = map(str, shape_true_ls)
    shape_mask_ls = ['-1' if i == 'NULL' else i for i in shape_mask_ls]
    shape_mask_ls = map(str, shape_mask_ls)
    seq = fa_dict[tx][fragment_start:fragment_end]

    savefn = '{}/{}.F{}-{}.M{}-{}.txt'.format(savefn_dir, tx, fragment_start,
                                              fragment_end, mask_start,
                                              mask_end)
    with open(savefn, 'w') as SAVEFN:
        SAVEFN.write('\t'.join(
            map(str, [
                tx, '1869', fragment_start, fragment_end, '.', '.', seq,
                ','.join(shape_mask_ls), ','.join(shape_true_ls)
            ])) + '\n')

    if plot_gradient:
        plot_savefn = savefn.replace('.txt', '.gradient.pdf')
        subprocess.call([
            "cd /home/gongjing/project/shape_imputation/ShapeImputation/scripts; python gradcam_SHAPEimpute.py --filename_validation {} --plot_savefn {}"
            .format(savefn, plot_savefn)
        ],
                        shell=True)

    return savefn
def generate_windows(out=None,
                     window_len_ls=None,
                     sliding_ls=None,
                     species=None,
                     all_valid_reactivity=0,
                     null_pct_max=0.9,
                     split_train_validate=0,
                     generate_random_null_and_ratio=0):
    if out is None:
        out = '/home/gongjing/project/shape_imputation/data/DMSseq_fibroblast_vivo/3.shape/shape.c200T2M0m0.out'
    if window_len_ls is None: window_len_ls = [50, 100]
    if species is None: species = 'human'
    fa_dict = util.read_fa(species=species)
    save_dir = out + '.windowsHasNull'
    util.check_dir_or_make(save_dir)
    for window_len in window_len_ls:
        for sliding in range(window_len, window_len + 1, 10):
            savefn = save_dir + '/' + 'windowLen%s.sliding%s.txt' % (
                window_len, sliding)
            # util.shape_fragmentation(out=out, savefn=savefn, window_len=window_len, sliding=sliding, all_valid_reactivity=1) # no null
            shape_fragmentation(out=out,
                                fa_dict=fa_dict,
                                savefn=savefn,
                                window_len=window_len,
                                sliding=sliding,
                                all_valid_reactivity=all_valid_reactivity,
                                null_pct_max=null_pct_max)  # has null
            if split_train_validate:
                np.random.seed(1234)
                csv_train, csv_validate = util.fragment_split(fragment=savefn,
                                                              train_frac=0.7,
                                                              cols=8)
                if generate_random_null_and_ratio:
                    data_random_null_filterNULL(
                        csv_train,
                        null_pct=generate_random_null_and_ratio,
                        col=9,
                        savefn=None,
                        seed=1234)
                    data_random_null_filterNULL(
                        csv_validate,
                        null_pct=generate_random_null_and_ratio,
                        col=9,
                        savefn=None,
                        seed=1234)
示例#6
0
def generate_dbn_react(shape_out=None,
                       species=None,
                       dots=None,
                       savefn_prefix=None,
                       min_len=0):
    dbn = savefn_prefix + '.dbn'
    react = savefn_prefix + '.react'

    shape_dict = util.read_icshape_out(shape_out, pureID=0)
    fa_dict = util.read_fa(fa=None, species=species, pureID=0)
    dot_dict = util.read_dots(dot=dots)

    DBN = open(dbn, 'w')
    REACT = open(react, 'w')
    for i in shape_dict:
        if i in fa_dict:
            seq = fa_dict[i][0:]
        else:
            continue
        if i in dot_dict:
            dot = dot_dict[i]['dotbracket']
        else:
            continue
        print(i, seq, dot, shape_dict[i]['reactivity_ls'])

        if len(seq) < min_len: continue
        if len(set([len(seq),
                    len(dot),
                    len(shape_dict[i]['reactivity_ls'])])) != 1:
            continue

        DBN.write('>' + i + '\n')
        DBN.write(seq + '\n')
        DBN.write(dot + '\n')
        DBN.write('\n')

        REACT.write('>' + i + '\n')
        for n, v in enumerate(shape_dict[i]['reactivity_ls']):
            REACT.write(str(n + 1) + '\t' + v.replace('NULL', 'NA') + '\n')
        REACT.write('\n')

    DBN.close()
    REACT.close()
示例#7
0
def extract_start_codon_shape(species, shape, savefn, max_null_pct=0.4, extend=50):
    trans_dict = util.loadTransGtfBed2(species=species)
    out_dict = util.read_icshape_out(out=shape, pureID=1)
    fa_dict = util.read_fa(fa=None, species=species, pureID=1)
    
    # start codon
    savefn1 = savefn.replace('.shape', '.start_codon.ok.shape')
    savefn2 = savefn.replace('.shape', '.start_codon.null.shape')
    SAVEFN1 = open(savefn1, 'w')
    SAVEFN2 = open(savefn2, 'w')
    
    tx_with_shape = []
    for i,j in out_dict.items():
        if i not in trans_dict: continue
        if int(trans_dict[i]['utr_5_end']) < extend: continue
        if int(trans_dict[i]['cds_end']) - int(trans_dict[i]['cds_start']) < extend: continue
        tx_with_shape.append(i)
        
        # 0-based
        start = int(trans_dict[i]['utr_5_end']) - 49 - 1
        end = int(trans_dict[i]['cds_start']) + 49
        shape = out_dict[i]['reactivity_ls'][start:end]
        seq = fa_dict[i][start:end]
        null_pct = shape.count('NULL') / len(shape)
        shape_str = ','.join(shape).replace('NULL', '-1')
        if null_pct == 1: continue
        if null_pct >= max_null_pct:
            state = 'null'
            SAVEFN2.write('\t'.join(map(str, [i, len(fa_dict[i][0:]), start, end, '.', '.', seq, shape_str, shape_str]))+'\n')
#             SAVEFN2.write('\t'.join(map(str, [i, start, end]))+'\n')
        else:
            state = 'ok'
            SAVEFN1.write('\t'.join(map(str, [i, len(fa_dict[i][0:]), start, end, '.', '.', seq, shape_str, shape_str]))+'\n')
#             SAVEFN1.write('\t'.join(map(str, [i, start, end]))+'\n')
        
    SAVEFN1.close()
    SAVEFN2.close()
    print('tx_with_shape: {}'.format(len(tx_with_shape)))
    
    savefn1_sort,_ = util.sort_two_shape(shape1=savefn1, value_col1=7, shape2=savefn1)
    savefn2_sort,_ = util.sort_two_shape(shape1=savefn2, value_col1=7, shape2=savefn2)
    
    df1 = util.plot_heatmap(fn=savefn1_sort, savefn=savefn1_sort+'.heatmap.pdf', value_col=7, fig_size_x=10, fig_size_y=20, cmap='summer', facecolor='black')
    df2 = util.plot_heatmap(fn=savefn2_sort, savefn=savefn2_sort+'.heatmap.pdf', value_col=7, fig_size_x=10, fig_size_y=20, cmap='summer', facecolor='black')
    df1_mean = list(df1.mean())
    df2_mean = list(df2.mean())
    
    fig,ax=plt.subplots(figsize=(16,8))
    ax.plot(df2_mean, label="%s(n=%s)"%('null',df2.shape[0]), marker='.')
    ax.plot(df1_mean, label="%s(n=%s)"%('ok',df1.shape[0]), marker='.')
    ax.set_ylim(0,0.6)
    plt.legend()
    plt.tight_layout()
    plt.savefig(savefn1_sort+'.meta.pdf')
    plt.close()
    
    # stop codon
    savefn1 = savefn.replace('.shape', '.stop_codon.ok.shape')
    savefn2 = savefn.replace('.shape', '.stop_codon.null.shape')
    SAVEFN1 = open(savefn1, 'w')
    SAVEFN2 = open(savefn2, 'w')
    
    tx_with_shape = []
    for i,j in out_dict.items():
        if i not in trans_dict: continue
        if int(trans_dict[i]['utr_3_end']) - int(trans_dict[i]['utr_3_start']) < extend: continue
        if int(trans_dict[i]['cds_end']) - int(trans_dict[i]['cds_start']) < extend: continue
        tx_with_shape.append(i)
        
        # 0-based
        start = int(trans_dict[i]['cds_end']) - 49 - 1
        end = int(trans_dict[i]['utr_3_start']) + 49
        shape = out_dict[i]['reactivity_ls'][start:end]
        null_pct = shape.count('NULL') / len(shape)
        seq = fa_dict[i][start:end]
        shape_str = ','.join(shape).replace('NULL', '-1')
        if null_pct == 1: continue
        if null_pct >= max_null_pct:
            state = 'null'
            SAVEFN2.write('\t'.join(map(str, [i, len(fa_dict[i][0:]), start, end, '.', '.', seq, shape_str, shape_str]))+'\n')
#             SAVEFN2.write('\t'.join(map(str, [i, start, end]))+'\n')
        else:
            state = 'ok'
            SAVEFN1.write('\t'.join(map(str, [i, len(fa_dict[i][0:]), start, end, '.', '.', seq, shape_str, shape_str]))+'\n')
#             SAVEFN1.write('\t'.join(map(str, [i, start, end]))+'\n')
        
    SAVEFN1.close()
    SAVEFN2.close()
    print('tx_with_shape: {}'.format(len(tx_with_shape)))
    
    savefn1_sort,_ = util.sort_two_shape(shape1=savefn1, value_col1=7, shape2=savefn1)
    savefn2_sort,_ = util.sort_two_shape(shape1=savefn2, value_col1=7, shape2=savefn2)
    
    df1 = util.plot_heatmap(fn=savefn1_sort, savefn=savefn1_sort+'.heatmap.pdf', value_col=7, fig_size_x=10, fig_size_y=20, cmap='summer', facecolor='black')
    df2 = util.plot_heatmap(fn=savefn2_sort, savefn=savefn2_sort+'.heatmap.pdf', value_col=7, fig_size_x=10, fig_size_y=20, cmap='summer', facecolor='black')
    df1_mean = list(df1.mean())
    df2_mean = list(df2.mean())
    
    fig,ax=plt.subplots(figsize=(16,8))
    ax.plot(df2_mean, label="%s(n=%s)"%('null',df2.shape[0]), marker='.')
    ax.plot(df1_mean, label="%s(n=%s)"%('ok',df1.shape[0]), marker='.')
    ax.set_ylim(0,0.6)
    plt.legend()
    plt.tight_layout()
    plt.savefig(savefn1_sort+'.meta.pdf')
    plt.close()
示例#8
0
def complete_shape_out(icshape=None,
                       species='human',
                       predict_label=None,
                       predict_model=None,
                       pct=0.5,
                       window_len=100,
                       sliding=50,
                       output_dir=None,
                       gpu_id=1):
    if not os.path.isdir(output_dir): os.mkdir(output_dir)
    fa_dict = util.read_fa(fa=None, species=species, pureID=1)

    icshape_fragment_all = output_dir + '/' + 'allfragment.txt'
    icshape_fragment_all2 = icshape_fragment_all + '2'
    util.shape_fragmentation(out=icshape,
                             fa_dict=fa_dict,
                             savefn=icshape_fragment_all,
                             window_len=window_len,
                             sliding=sliding,
                             all_valid_reactivity=0,
                             null_pct_max=2)
    cmd = '''awk '{print $0"\\t"$NF}' ''' + " {} > {}; sed -i 's/NULL/-1/g' {}".format(
        icshape_fragment_all, icshape_fragment_all2, icshape_fragment_all2)
    # print(cmd)
    subprocess.call([cmd], shell=True)

    icshape_fragment_pct = output_dir + '/' + 'allfragment.{}.txt'.format(pct)
    util.shape_fragmentation(out=icshape,
                             fa_dict=fa_dict,
                             savefn=icshape_fragment_pct,
                             window_len=window_len,
                             sliding=sliding,
                             all_valid_reactivity=0,
                             null_pct_max=pct)
    icshape_fragment_pct2 = icshape_fragment_pct + '2'
    cmd = '''awk '{print $0"\\t"$NF}' ''' + " {} > {}; sed -i 's/NULL/-1/g' {}".format(
        icshape_fragment_pct, icshape_fragment_pct2, icshape_fragment_pct2)
    # print(cmd)
    subprocess.call([cmd], shell=True)

    predict = output_dir + '/' + 'predict.txt'
    cmd_predict = 'bash predict_new_icshape.sh {} {} {} {}'.format(
        gpu_id, icshape_fragment_pct2, predict, predict_model)
    subprocess.call([cmd_predict], shell=True)
    # predict = '/home/gongjing/project/shape_imputation/exper/{}/prediction.{}.txt'.format(predict_model, predict_label)

    predict_shape_out = predict.replace('.txt', '.out')
    util.predict_to_shape(validation=icshape_fragment_pct2,
                          predict=predict,
                          shape_out=predict_shape_out)

    # 从总的fragment减去小于pct的,得到剩余的不用预测的大于pct的fragment
    icshape_fragment_exceed_pct2 = output_dir + '/' + 'allfragment.exceed{}.txt2'.format(
        pct)
    cmd = ''' awk 'NR==FNR{a[$1$3$4];next} !($1$3$4 in a){print $0}' ''' + '''{} {} > {}'''.format(
        icshape_fragment_pct2, icshape_fragment_all2,
        icshape_fragment_exceed_pct2)
    subprocess.call([cmd], shell=True)

    # 对大于pct的fragment生成预测值文件:直接使用原来的值(即最后一列即可)
    icshape_fragment_exceed_pct2_predict = icshape_fragment_exceed_pct2 + '.predict'  # not truly predict but generate a pseudo file
    cmd = ''' awk '{print $NF}' ''' + ''' {} > {} '''.format(
        icshape_fragment_exceed_pct2, icshape_fragment_exceed_pct2_predict)
    subprocess.call([cmd], shell=True)

    # 重新合并小于pct和大于pct的fragment文件(相当于validation文件)
    icshape_fragment_pct_plus_exceed_pct2 = output_dir + '/' + 'allfragment.{}+exceed{}.txt2'.format(
        pct, pct)
    cmd = ''' cat {} {} > {}'''.format(icshape_fragment_pct2,
                                       icshape_fragment_exceed_pct2,
                                       icshape_fragment_pct_plus_exceed_pct2)
    subprocess.call([cmd], shell=True)

    # 合并预测的文件
    icshape_fragment_pct_plus_exceed_predict = output_dir + '/' + 'allfragment.{}+exceed{}.txt2.predict'.format(
        pct, pct)
    cmd = ''' cat {} {} > {} '''.format(
        predict, icshape_fragment_exceed_pct2_predict,
        icshape_fragment_pct_plus_exceed_predict)
    subprocess.call([cmd], shell=True)

    # 根据重新合并的validation,预测文件,生成.out文件
    icshape_fragment_pct_plus_exceed_predict_shapeout = icshape_fragment_pct_plus_exceed_predict + '.out'
    util.predict_to_shape(
        validation=icshape_fragment_pct_plus_exceed_pct2,
        predict=icshape_fragment_pct_plus_exceed_predict,
        shape_out=icshape_fragment_pct_plus_exceed_predict_shapeout)

    # 画真实和预测的null pct scatter
    savefn = icshape_fragment_pct_plus_exceed_predict_shapeout + '.scatter.pdf'
    stat1, stat2 = plot_two_shape_common_tx_pct.plot_shape_tx_null_pct(
        out1=icshape,
        out2=icshape_fragment_pct_plus_exceed_predict_shapeout,
        out1_label='True',
        out2_label='Predict',
        savefn=savefn,
        species=species)

    return icshape_fragment_pct_plus_exceed_predict_shapeout, stat1, stat2
def get_stat(coverage_ls=None, RT_ls=None, prefix=None):
    if coverage_ls is None:
        coverage_ls = [0, 50, 100, 150, 200, 250]
    if RT_ls is None:
        RT_ls = [0, 1, 2, 3]
    if prefix is None:
        prefix = '/Share2/home/zhangqf5/gongjing/RNA-structure-profile-imputation/data/hek_wc_vivo/3.shape'

    fa_dict = util.read_fa(species='human')
    trans_dict = util.loadTransGtfBed2(species='human')

    stat_dict = nested_dict(3, int)
    for coverage in coverage_ls:
        for RT in RT_ls:
            shape_out = '%s/shape.c%sT%sM0m0.out' % (prefix, coverage, RT)
            out_dict = util.read_icshape_out(shape_out)
            out_dict_stat = util.shape_dict_stat(shape_dict=out_dict,
                                                 fa_dict=fa_dict,
                                                 trans_dict=trans_dict,
                                                 RNA_type='all')
            print(out_dict_stat)

            stat_dict['tx_count'][coverage][RT] = len(out_dict)
            stat_dict['total_bases'][coverage][RT] = out_dict_stat[
                'total_bases']
            stat_dict['total_bases(NULL_pct)'][coverage][RT] = out_dict_stat[
                'total_bases(NULL_pct)']
            stat_dict['A(NULL_pct)'][coverage][RT] = out_dict_stat[
                'A(NULL_pct)']
            stat_dict['T(NULL_pct)'][coverage][RT] = out_dict_stat[
                'T(NULL_pct)']
            stat_dict['C(NULL_pct)'][coverage][RT] = out_dict_stat[
                'C(NULL_pct)']
            stat_dict['G(NULL_pct)'][coverage][RT] = out_dict_stat[
                'G(NULL_pct)']

    print(pd.DataFrame.from_dict(stat_dict['tx_count'], orient='index'))
    print(pd.DataFrame.from_dict(stat_dict['total_bases'], orient='index'))
    print(
        pd.DataFrame.from_dict(stat_dict['total_bases(NULL_pct)'],
                               orient='index'))

    # fig,ax=plt.subplots()
    # sns.heatmap(tx_count_df,annot=True,fmt='d',linewidths=0.5)
    # ax.set_yticklabels(ax.yaxis.get_majorticklabels(), rotation=0)
    # ax.set_xlabel('Cutoff (average RT stop count)')
    # ax.set_ylabel('Cutoff (base density)')
    # savefn=prefix+'/shape.tx_num.pdf'
    # plt.tight_layout()
    # plt.savefig(savefn)
    # plt.close()

    util.heatmap(pd.DataFrame.from_dict(stat_dict['tx_count'], orient='index'),
                 xlabel='Cutoff (average RT stop count)',
                 ylabel='Cutoff (base density)',
                 savefn=prefix + '/shape.tx_num.pdf',
                 fmt='d')
    util.heatmap(pd.DataFrame.from_dict(stat_dict['total_bases'],
                                        orient='index'),
                 xlabel='Cutoff (average RT stop count)',
                 ylabel='Cutoff (base density)',
                 savefn=prefix + '/shape.total_bases.pdf',
                 fmt='.2g')
    util.heatmap(pd.DataFrame.from_dict(stat_dict['total_bases(NULL_pct)'],
                                        orient='index'),
                 xlabel='Cutoff (average RT stop count)',
                 ylabel='Cutoff (base density)',
                 savefn=prefix + '/shape.total_null_pct.pdf',
                 fmt='.2g')
    util.heatmap(pd.DataFrame.from_dict(stat_dict['A(NULL_pct)'],
                                        orient='index'),
                 xlabel='Cutoff (average RT stop count)',
                 ylabel='Cutoff (base density)',
                 savefn=prefix + '/shape.A_null_pct.pdf',
                 fmt='.2g')
    util.heatmap(pd.DataFrame.from_dict(stat_dict['T(NULL_pct)'],
                                        orient='index'),
                 xlabel='Cutoff (average RT stop count)',
                 ylabel='Cutoff (base density)',
                 savefn=prefix + '/shape.T_null_pct.pdf',
                 fmt='.2g')
    util.heatmap(pd.DataFrame.from_dict(stat_dict['C(NULL_pct)'],
                                        orient='index'),
                 xlabel='Cutoff (average RT stop count)',
                 ylabel='Cutoff (base density)',
                 savefn=prefix + '/shape.C_null_pct.pdf',
                 fmt='.2g')
    util.heatmap(pd.DataFrame.from_dict(stat_dict['G(NULL_pct)'],
                                        orient='index'),
                 xlabel='Cutoff (average RT stop count)',
                 ylabel='Cutoff (base density)',
                 savefn=prefix + '/shape.G_null_pct.pdf',
                 fmt='.2g')