Пример #1
0
def single_validation(fraction=None, test_index=None):
    '''Run a single validation experiment with fraction% of deleted test genotypes. Returns a validation
    Experiment object.'''
    p = im.hutt('hutt.npz')
    e = im.v.Experiment(p, fraction=fraction, test_index=test_index)
    phaser = im.phase_main.main_phaser(print_times=True)
    e.run(phaser)
    (_, stats) = im.plots.plot_experiment_stats(e)
    print stats[np.argsort(-stats[:, 2] / stats[:, 1]), :]
    return e
Пример #2
0
def single_validation(fraction=None, test_index=None):
    '''Run a single validation experiment with fraction% of deleted test genotypes. Returns a validation
    Experiment object.'''
    p = im.hutt('hutt.npz')
    e = im.v.Experiment(p, fraction=fraction, test_index=test_index)
    phaser = im.phase_main.main_phaser(print_times=True)
    e.run(phaser)
    (_, stats) = im.plots.plot_experiment_stats(e)
    print stats[np.argsort(-stats[:, 2] / stats[:, 1]), :]
    return e
Пример #3
0
def plot_two_families():
    '''Test ancestor imputation and child POO alignment for two families on chromosome 22.'''
    # Parameters
    chrom = 22
    plot = False  # True
    save_plot = False  # True
    debug = False  # True
    
    # Read data
    p = im.hutt('hutt.phased.npz')
    q = p.pedigree.quasi_founders
    # aligned = set(p.haplotype.aligned_samples)
    t = frozenset([frozenset(im.gt.genotyped_children(p, p.pedigree.find_family_by_child(i, genotyped=False))) for i in q])
    num_sibs = map(len, t)
    print 'Distribution of QF family sizes', util.occur_dict(num_sibs)
    # plot_hist_num_sibs(num_sibs)
    
    # ibd = im.index.segment_index.SegmentIndex(os.environ['OBER_OUT'] + '/index_segments')
    
    if plot: P.figure(1)
    s = set([x for x in t if 1049 in x][0]) - set([1049])
    pa, _ = analyze_family(p, s, max_colors=4, title='Haplotype Coloring: Quasi-Founder Sibs, All, Chrom. %d' % (chrom,), plot=plot, debug=debug)
    if save_plot: P.savefig(os.environ['OBER'] + '/doc/poo/qf_family/hap_colors_poo.png')
    
    if plot: P.figure(2)
    s2 = set([x for x in t if 1049 in x][0])
    analyze_family(p, s2, max_colors=4, title='Haplotype Coloring: Quasi-Founder Sibs, POOC Chrom. %d' % (chrom,), plot=plot, debug=debug)
    if save_plot: P.savefig(os.environ['OBER'] + '/doc/poo/qf_family/hap_colors_all.png')
    
    if plot: P.figure(3)
    f = p.find_family(10, 1414)  # 4 children, genotyped parents
    s3 = f.children
    analyze_family(p, s3, max_colors=4, title='Haplotype Coloring: Non-Founder Sibs Chrom. %d' % (chrom,), plot=plot, debug=debug)
    if save_plot: P.savefig(os.environ['OBER'] + '/doc/poo/nf_family/hap_colors.png')
    
    if plot: P.show()
    print im.color.hap_color.best_hap_alignment_to_colors(pa)
    print 'Regions', pa.num_regions
    print 'Parental haplotype coverage %', parent_coverage_fraction(pa, p)
    print 'Children coverage by parental haplotypes', pa.color_sequence_coverage(np.arange(4))
Пример #4
0
decide whether to genotype them with a dense or sparse
Illumina chip.   

Created on July 15, 2013
@author: Oren Livne <*****@*****.**>
============================================================
'''
import impute as im, os, numpy as np, matplotlib.pyplot as P

# Load data
ped = im.hutt_pedigree()
path = os.environ['OBER_OUT'] + '/kids'
chrom = 22
prefix = path + '/cytosnp/chr%d/cytosnp.imputed' % (chrom, )
illumina = im.io.read_npz(prefix + '.phased.npz')
affy = im.hutt('hutt.phased.npz')

# Large family - with lots of sibs of one of the new Hutt kids
#parents = 246, 389
parents = 288, 465
f = ped.find_family(parents[0], parents[1])

# Compare Illumina, Affy IBD sharing pictures
P.figure(1)
im.plots.plot_family_comparison(affy, f, 1, xaxis='bp')
P.savefig(os.environ['OBER'] + '/doc/kids/family_%d_%d_affy.png' % parents)

P.figure(2)
im.plots.plot_family_comparison(illumina, f, 1, xaxis='bp')
P.savefig(os.environ['OBER'] + '/doc/kids/family_%d_%d_illumina.png' % parents)
Пример #5
0
#!/usr/bin/env python
'''
============================================================
Test GERMLINE IBD on 507's ungenotyped family. 

Created on September 15, 2012
@author: Oren Livne <*****@*****.**>
============================================================
'''
import impute as im
import numpy as np

p = im.hutt('hutt.stage3.npz')
q = im.hutt('hutt.stage3.npz')
phaser = im.phase_distant.family_sib_comparison_phaser()
i = 507
phaser.run(q, im.PhaseParam(single_member=i, debug=True))

print np.where(p.haplotype.data[:, i, :] != q.haplotype.data[:, i, :])
Пример #6
0
def hutt_ibd_segments(hutt_file, i, ai, j, bj, **kwargs):
    '''IBD segments using a phasing npz file relative to the chr22 directory.'''
    return problem_ibd_segments(im.hutt(hutt_file), i, ai, j, bj, **kwargs)
Пример #7
0
    print 'Parental haplotype coverage %', parent_coverage_fraction(pa, p)
    print 'Children coverage by parental haplotypes', pa.color_sequence_coverage(np.arange(4))

####################################################################################
if __name__ == '__main__':
    '''
    --------------------------------------------------
    Main program
    --------------------------------------------------
    '''
#     parent_coverage = Counter()
#     for chrom in CHROMOSOMES[-1:]:
#         print 'Chromosome', chrom
#         p = im.io.read_npz('%s/phasing/chr%d/hutt.phased.npz' % (os.environ['OBER_OUT'], chrom))
#         # plot_two_families()
#         parent_coverage_chrom = qf_families_parent_coverage(p)
#         parent_coverage += parent_coverage_chrom 
#         print parent_coverage_chrom
#     parents = set(x[0] for x in parent_coverage.iterkeys())
#     a = [(b, (parent_coverage[(b, 0)] + parent_coverage[(b, 1)]) / (2.*sum(ChromDao.TOTAL_BP_TYPED[-1:]))) for b in set(x[0] for x in parent_coverage.iterkeys())]
#    print a

    p = im.hutt('hutt.phased.npz')

    # pa, segments = analyze_family(p, np.setdiff1d(im.gt.genotyped_children(p, f), [1069]))#, debug=True)
    # f = p.find_family_by_child(998, genotyped=False)
    # pa, segments = analyze_family(p, im.gt.genotyped_children(p, f))
    
    f = p.find_family_by_child(640, genotyped=False)
    pa, segments = analyze_family(p, im.gt.genotyped_children(p, f))
Пример #8
0
#!/usr/bin/env python
"""
============================================================
Imputation test - chromosome 22.

Created on February 4, 2013
@author: Oren Livne <*****@*****.**>
============================================================
"""
import impute as im, numpy as np, os

OBER = os.environ["OBER"]
p = im.hutt("hutt.phased.npz")
ibd = im.smart_segment_set.SmartSegmentSet.load(p.pedigree.num_genotyped, OBER + "/out/segments.out")
t = im.imputation.ImputationSet.from_file(p.pedigree, OBER + "/data/impute/rare/rare.npz")  # @UndefinedVariable
snps = np.where(t.snp["chrom"] == 22)[0]  # SNP list out of all SNPs in t to impute
im.imputation.iibd.impute(p.haplotype, ibd, t, snp=snps, debug=False)
Пример #9
0
Plot the recombination rate lambda=lambda(f) where f is
the inbreeding coefficient. Per discussion with Mark Abney
on IBD HMM for haplotypes.  

Created on January 23, 2013
@author: Oren Livne <*****@*****.**>
============================================================
'''
import matplotlib.pyplot as P, impute as im, os

lam = im.hap_lambda.lambda_vs_f()
F, L, S = im.hap_lambda.lambda_mean(lam)

out_dir = os.environ['OBER'] + '/doc/ibd'

# Bin lambda, f and calculate mean and stddev of each bin so that we can see trends
P.close(1)
P.figure(1)
im.hap_lambda.plot_lambda_vs_f(lam)
# P.title('Recombination Rate vs. Inbreeding in the Hutterities')
P.show()
P.savefig(out_dir + '/lambda_vs_f.eps')

# Plot lambda std dev vs. mean lambda in family children
P.close(2)
problem = im.hutt('hutt.npz')
P.figure(2)
im.hap_lambda.plot_lambda_std(problem)
P.show()
P.savefig(out_dir + '/lambda_child.eps')
Пример #10
0
if __name__ == '__main__':
    '''
    --------------------------------------------------
    Main program
    --------------------------------------------------
    '''
    out_dir = os.environ['OBER'] + '/doc/imputation/validation/ibd-optimization'
    
    # Data format: [region start snp, region stop snp, # IBD pairs in 1415x1415 IBD set]
    # Data obtained manually with commands like
    #i=50; j=3168; cat segments.out | awk -v i=$i -vj=$j '($1 <= i) && (j <= $2)' | wc -l
    # (or into file and then wc -l)

    pairs = [((50, 3168), 1428), ((550, 2650), 7554), ((1050, 2150), 34630), ((1300, 1900), 62017), ((1425, 1775), 94152)]
    
    p = im.hutt('hutt.npz')
    cm = p.info.snp['dist_cm']
    chrom = p.info.snp['chrom'][0]
    
    l = [cm[x[0][1]] - cm[x[0][0]] for x in pairs]
    num_pairs = [x[1] for x in pairs]    
    
    P.figure(1)
    P.clf()
    P.semilogy(l, num_pairs, 'bo-')
    P.grid(True)
    P.title('Chromosome %d' % (chrom,))
    P.xlabel('Region Length [cM]')
    P.ylabel('# IBD Pairs')
    P.show()
    P.savefig('%s/num_ibd_pairs.chr%d.png' % (out_dir, chrom,))
Пример #11
0
#!/usr/bin/env python
'''
============================================================
Test GERMLINE IBD on 507's ungenotyped family. 

Created on September 15, 2012
@author: Oren Livne <*****@*****.**>
============================================================
'''
import impute as im
import numpy as np

p = im.hutt('hutt.stage3.npz')
q = im.hutt('hutt.stage3.npz')
phaser = im.phase_distant.family_sib_comparison_phaser()
i = 507
phaser.run(q, im.PhaseParam(single_member=i, debug=True))

print np.where(p.haplotype.data[:,i,:] != q.haplotype.data[:,i,:])
Пример #12
0
'''
============================================================
Plot phasing % after the different phasing stages. 

Created on August 16, 2012
@author: Oren Livne <*****@*****.**>
============================================================
'''
import impute as im, matplotlib.pyplot as P, numpy as np, os
from numpy.core.function_base import linspace

P.figure(1)
P.clf()
P.hold(True)

p = im.hutt('hutt.phased.npz')
d5 = im.plots.plot_fill_fraction(p, color='b', label='Stage 5')

p = im.hutt('hutt.stage6.npz')
d = im.plots.plot_fill_fraction(p, color='r', label='Stage 6')

zoom = 0.96
ticks = 10
min_y = 0.95 * min(d[:, 1][0], d5[:, 1][0])
max_x = np.where(d[:, 1] > zoom)[0][0]
P.xlim([0, max_x + 1])
P.ylim([min_y, 1.0])
yticks = linspace(min_y, 1.0, ticks)
P.yticks(yticks, ['%.3f' % (t,) for t in yticks])

P.title('Hutterites Phasing Coverage, Chromosome 22')
Пример #13
0
#!/usr/bin/env python
'''
============================================================
Find Lung function study ID sublist of the entire Hutterites
problem set ID list. Output indices into the latter.

Created on Feb 18, 2013
@author: Oren Livne <*****@*****.**>
============================================================
'''
import sys, os, numpy as np, impute as im
#from optparse import OptionParser

####################################################################################
if __name__ == '__main__':
    '''
    --------------------------------------------------
    Main program
    --------------------------------------------------
    '''
    d = os.environ['OBER_DATA'] + '/lung'
    s = im.io_pedigree.read(d + '/hutt-lung.pdg.tfam',
                            genotyped_id_file=d + '/hutt-lung-samples.txt')
    p = im.hutt('hutt.stage5.npz')
    i = np.array([
        i in s.sample_id[0:s.num_genotyped]
        for i in p.pedigree.sample_id[0:p.pedigree.num_genotyped]
    ])
    np.savetxt(sys.stdout, np.where(i)[0], fmt='%d')
Пример #14
0
def hutt_ibd_segments(hutt_file, i, ai, j, bj, **kwargs):
    '''IBD segments using a phasing npz file relative to the chr22 directory.'''
    return problem_ibd_segments(im.hutt(hutt_file), i, ai, j, bj, **kwargs)