Exemplo n.º 1
0
    data = grapher.import_file(filename)

    run_ids = set_up_sequencing_run_ids()
    dmso, kla, kla_dex, all_dmso, all_kla, all_kla_dex = get_sequencing_run_id_sets(
    )
    total_tags = total_tags_per_run()

    # Norm sum scalars listed for all, group 1, group 2, group 3, group 4
    kla_scalars = [1.223906, 1.281572, 1.118363, 1.104860, 1.503260]
    kla_dex_scalars = [1.182574, 1.147695, 1.248636, 1.069588, 1.388871]
    dex_over_kla_scalars = [1.069073, 0.967659, 1.122628, 1.008758, 0.927466]

    for i, scalar in enumerate(kla_scalars):
        data = grapher.normalize(data,
                                 'kla_{0}tag_count'.format(get_rep_string(i)),
                                 scalar)
    for i, scalar in enumerate(kla_dex_scalars):
        data = grapher.normalize(
            data, 'kla_dex_{0}tag_count'.format(get_rep_string(i)), scalar)
    for i, scalar in enumerate(dex_over_kla_scalars):
        data = grapher.normalize(data,
                                 'kla_dex_{0}tag_count'.format(
                                     get_rep_string(i)),
                                 scalar,
                                 suffix='_norm_2')

    refseq = data[data['has_refseq'] != 0]
    refseq = refseq[refseq['transcript_score'] >= 10]

    scatter_dirpath = grapher.get_filename(dirpath, 'scatterplots')
Exemplo n.º 2
0
Created on Mar 23, 2012

@author: karmel
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
import os

if __name__ == '__main__':
    grapher = SeqGrapher()

    dirpath = '/Users/karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/BMDCs/Analysis/'
    filename = os.path.join(dirpath, 'balbc_nod_vectors.txt')
    data = grapher.import_file(filename)

    # vs balbc counterpart
    data = grapher.normalize(data, 'nod_notx_0h_tag_count', 2.790489)
    data = grapher.normalize(data, 'diabetic_nod_notx_0h_tag_count', 1.083990)
    data = grapher.normalize(data, 'slow_diabetic_nod_notx_0h_tag_count',
                             0.349747)

    # Vs nod notx
    data = grapher.normalize(data,
                             'diabetic_nod_notx_0h_tag_count',
                             0.483232,
                             suffix='_norm_2')
    data = grapher.normalize(data,
                             'slow_diabetic_nod_notx_0h_tag_count',
                             0.276080,
                             suffix='_norm_2')

    refseq = grapher.get_refseq(data)
Exemplo n.º 3
0
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells_Finland_2012/Analysis_2013_02'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'with_me3',
                                           'basic_scatterplots')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))
    data = data.fillna(0)
    data = data[data['naive_me3_tag_count'] + data['act_me3_tag_count'] > 0]

    for key1, key2, norm_factor in comparison_sets:
        name1 = pretty_names[key1[:-1]] + key1[-1:]
        name2 = pretty_names[key2[:-1]] + key2[-1:]

        data_normed = yzer.normalize(data, key2 + '_tag_count', norm_factor)
        ax = yzer.scatterplot(
            data_normed,
            key1 + '_tag_count',
            key2 + '_tag_count_norm',
            log=True,
            color='blue',
            title='{0} versus {1} Normalized Tag Counts'.format(name1, name2),
            xlabel='{0} tags in RefSeq transcripts'.format(name1),
            ylabel='{0} tags in RefSeq transcripts, normalized'.format(name2),
            add_noise=False,
            show_2x_range=True,
            show_legend=False,
            plot_regression=False,
            show_count=True,
            show_correlation=True,
Exemplo n.º 4
0
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero

if __name__ == '__main__':
    yzer = SeqGrapher()
    yzer.fig_size = 15
    yzer.disable_show_plot = True
    
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/ThioMacs/Analysis_2013_02/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'refseq_expression')
    data = yzer.import_file(yzer.get_filename(dirpath, 'transcript_vectors.txt'))
    
    data = data.fillna(0)
    data = yzer.normalize(data, 'nod_notx_1h_tag_count', 1.095436)
    data = yzer.normalize(data, 'nod_kla_1h_tag_count', 0.652898)
    #data = yzer.normalize(data, 'nonplated_diabetic_nod_notx_tag_count', 0.885427)
    #data = yzer.normalize(data, 'nonplated_diabetic_balb_notx_tag_count', 0.645579)
    
    data['balb_notx_1h_reads_per_base'] = data['balb_notx_1h_tag_count']/data['length']
    data['balb_kla_1h_reads_per_base'] = data['balb_kla_1h_tag_count']/data['length']
    
    data['balb_notx_1h_tag_count'] = nonzero(data['balb_notx_1h_tag_count'])
    data['nod_notx_1h_tag_count_norm'] = nonzero(data['nod_notx_1h_tag_count_norm'])
    data['balb_kla_1h_tag_count'] = nonzero(data['balb_kla_1h_tag_count'])
    data['nod_kla_1h_tag_count_norm'] = nonzero(data['nod_kla_1h_tag_count_norm'])
    
    data = data[data['transcript_score'] >= 4]
    data = data[data[['balb_notx_1h_tag_count','nod_notx_1h_tag_count_norm',
                      'balb_kla_1h_tag_count','nod_kla_1h_tag_count_norm']].max(axis=1) >= 10]
Exemplo n.º 5
0
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
import os
from scipy.stats.stats import ttest_ind

if __name__ == '__main__':
    grapher = SeqGrapher()

    dirpath = '/Volumes/karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Inbred strains/Peak comparisons/Compared with NOD/'
    filename = os.path.join(dirpath, 'bl6_gt_balb_with_nod_pu_1_unique.txt')
    data = grapher.import_file(filename)

    data = data.fillna(0)  # For easy log graphing

    wt_peaks, balb_peaks, nod_peaks, balb2_peaks = 67074, 79353, 107716, 94199

    data = grapher.normalize(data, 'balb_pu_1_tag_count',
                             1)  #balb_peaks/wt_peaks)
    data = grapher.normalize(data, 'nod_pu_1_tag_count',
                             1)  #nod_peaks/wt_peaks)
    data = grapher.normalize(data, 'balb2_pu_1_tag_count',
                             1)  #balb2_peaks/wt_peaks)

    for i, row in data[data['wt_pu_1_tag_count'] < 20].iterrows():
        data['wt_pu_1_tag_count'][i] = 0
    for i, row in data[data['balb_pu_1_tag_count_norm'] < 20].iterrows():
        data['balb_pu_1_tag_count_norm'][i] = 0
    for i, row in data[data['nod_pu_1_tag_count_norm'] < 20].iterrows():
        data['nod_pu_1_tag_count_norm'][i] = 0

    data[
        'wt_to_balb'] = data['wt_pu_1_tag_count'] / data['balb_pu_1_tag_count']
    data['nod_to_balb'] = data['nod_pu_1_tag_count'] / data[
if __name__ == '__main__':
    grapher = SeqGrapher()

    dirpath = grapher.get_path(
        '/karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Inbred strains/Groseq comparisons/'
    )

    filename = os.path.join(dirpath, 'groseq_with_h3k4me2.txt')
    data = grapher.import_file(filename)

    data = data.fillna(0)

    nod_norm, balb_norm = 0.393529, 0.359488
    nod_to_balb_norm = 1.102844

    data = grapher.normalize(data, 'balb_tag_count', nod_norm)
    data = grapher.normalize(data, 'nod_tag_count', balb_norm)

    print len(data)

    data['nod_with_bl6'] = data['nod_sv_id'] <= .1

    nod_with_bl6 = data[data['nod_with_bl6'] == True]
    nod_with_balb = data[data['nod_with_bl6'] == False]

    nod_with_bl6 = grapher.collapse_strands(nod_with_bl6)
    nod_with_balb = grapher.collapse_strands(nod_with_balb)
    nod_with_bl6 = nod_with_bl6[nod_with_bl6['wt_peak_tag_count'] > 2 *
                                nod_with_bl6['balb_peak_tag_count']]
    nod_with_balb = nod_with_balb[nod_with_balb['wt_peak_tag_count'] > 2 *
                                  nod_with_balb['balb_peak_tag_count']]