示例#1
0
def get_child_ids(path):
    '''Load all new Hutt kid IDs from the ID files under the path ''path''.
    Print the subset of kids whose both parents are in the pedigree and genotyped, or just in the pedigree.
    
    Return the list of (kid id, father index, mother index) for all kids whose both parents
    are in the pedigree and genotyped.'''
    m = np.loadtxt(path + '/ids/parents.csv', dtype=int, skiprows=1)
    p = im.hutt_pedigree()
    print 'Untyped kids in Michelle' 's study', m.shape[0]
    kids = [
        x[0] for x in m if p.node_of.has_key(x[1]) and p.node_of.has_key(x[2])
        and p.is_genotyped(p.node_of[x[1]]) and p.is_genotyped(p.node_of[x[2]])
    ]
    print 'Kids with genotyped parents', len(kids)
    ids = np.array([
        (x[0], p.node_of[x[2]], p.node_of[x[1]]) for x in m
        if p.node_of.has_key(x[1]) and p.node_of.has_key(x[2])
        and p.is_genotyped(p.node_of[x[1]]) and p.is_genotyped(p.node_of[x[2]])
    ])
    untyped_parents = [
        x[0] for x in m if p.node_of.has_key(x[1]) and p.node_of.has_key(x[2])
        and not (p.is_genotyped(p.node_of[x[1]])
                 and p.is_genotyped(p.node_of[x[2]]))
    ]
    print 'Kids with untyped parents that appear in the pedigree', len(
        untyped_parents), repr(untyped_parents)
    parents_not_in_pedigree = [
        x[0] for x in m
        if not (p.node_of.has_key(x[1]) and p.node_of.has_key(x[2]))
    ]
    print 'Kids with whose parents are not both in the pedigree', len(
        parents_not_in_pedigree), repr(parents_not_in_pedigree)
    return ids
示例#2
0
 def __init__(self, file_name, affy_bim, genotyped_id_file, debug=False): 
     p = im.hutt_pedigree()
     self.qf = p.quasi_founders
     self.non_qf = np.setdiff1d(xrange(p.num_genotyped), p.quasi_founders)
     self.debug = debug
     # Load affy SNP names 
     self.affy = set(np.loadtxt(affy_bim, usecols=[1], dtype=str))
     self.sample_id = read_sample_id(genotyped_id_file)
     # Load data, cache statistics in the data field
     self.data = self.__stats_struct(file_name)
示例#3
0
 def __init__(self, file_name, affy_bim, genotyped_id_file, debug=False):
     p = im.hutt_pedigree()
     self.qf = p.quasi_founders
     self.non_qf = np.setdiff1d(xrange(p.num_genotyped), p.quasi_founders)
     self.debug = debug
     # Load affy SNP names
     self.affy = set(np.loadtxt(affy_bim, usecols=[1], dtype=str))
     self.sample_id = read_sample_id(genotyped_id_file)
     # Load data, cache statistics in the data field
     self.data = self.__stats_struct(file_name)
示例#4
0
def get_child_ids(path):
    '''Load all new Hutt kid IDs from the ID files under the path ''path''.
    Print the subset of kids whose both parents are in the pedigree and genotyped, or just in the pedigree.
    
    Return the list of (kid id, father index, mother index) for all kids whose both parents
    are in the pedigree and genotyped.'''
    m = np.loadtxt(path + '/ids/parents.csv', dtype=int, skiprows=1)
    p = im.hutt_pedigree()
    print 'Untyped kids in Michelle''s study', m.shape[0]
    kids = [x[0] for x in m if p.node_of.has_key(x[1]) and p.node_of.has_key(x[2]) and p.is_genotyped(p.node_of[x[1]]) and p.is_genotyped(p.node_of[x[2]])]
    print 'Kids with genotyped parents', len(kids)
    ids = np.array([(x[0], p.node_of[x[2]], p.node_of[x[1]]) for x in m if p.node_of.has_key(x[1]) and p.node_of.has_key(x[2]) and p.is_genotyped(p.node_of[x[1]]) and p.is_genotyped(p.node_of[x[2]])])
    untyped_parents = [x[0] for x in m if p.node_of.has_key(x[1]) and p.node_of.has_key(x[2]) and not (p.is_genotyped(p.node_of[x[1]]) and p.is_genotyped(p.node_of[x[2]]))]
    print 'Kids with untyped parents that appear in the pedigree', len(untyped_parents), repr(untyped_parents)
    parents_not_in_pedigree = [x[0] for x in m if not (p.node_of.has_key(x[1]) and p.node_of.has_key(x[2]))]
    print 'Kids with whose parents are not both in the pedigree', len(parents_not_in_pedigree), repr(parents_not_in_pedigree)
    return ids
示例#5
0
 def __init__(self,
              chrom,
              pedigree=None,
              genotyped_id_file=None,
              ibd_index_location=None,
              params=None):
     '''Load data and initialize for chromosome number chrom.'''
     self.params = params if params else im.param.PhaseParam()
     # TODO: replace this hard-coded value by pedigree location parameter passing
     self.ped = im.io_pedigree.read(pedigree,
                                    genotyped_id_file=genotyped_id_file
                                    ) if pedigree else im.hutt_pedigree()
     self.ibd = im.index.segment_index.SegmentIndex(
         ibd_index_location if ibd_index_location else
         os.environ['OBER_OUT'] + '/index_segments')
     self.num_snps = len(self.ibd._snp)
     # Quasi-founder = either parent of his/her is not genotyped
     self.quasi_founders = np.where([
         all((y >= self.ped.num_genotyped)
             for y in self.ped.graph.predecessors(x))
         for x in xrange(self.ped.num_genotyped)
     ])[0]
     self.quasi_founders_set = set(self.quasi_founders)
     self._init_chrom(chrom)
示例#6
0
#!/usr/bin/env python
'''
============================================================
Estimate Hutterite kid imputation rates so that Carole can
decide whether to genotype them with a dense or sparse
Illumina chip.   

Created on July 15, 2013
@author: Oren Livne <*****@*****.**>
============================================================
'''
import impute as im, os, numpy as np, matplotlib.pyplot as P

# Load data
ped = im.hutt_pedigree()
path = os.environ['OBER_OUT'] + '/kids'
chrom = 22
prefix = path + '/cytosnp/chr%d/cytosnp.imputed' % (chrom, )
illumina = im.io.read_npz(prefix + '.phased.npz')
affy = im.hutt('hutt.phased.npz')

# Large family - with lots of sibs of one of the new Hutt kids
#parents = 246, 389
parents = 288, 465
f = ped.find_family(parents[0], parents[1])

# Compare Illumina, Affy IBD sharing pictures
P.figure(1)
im.plots.plot_family_comparison(affy, f, 1, xaxis='bp')
P.savefig(os.environ['OBER'] + '/doc/kids/family_%d_%d_affy.png' % parents)
示例#7
0
def get_sib_ids(path):
    ped = im.hutt_pedigree()
    ids = get_child_ids(path) 
    sibs = kid_sibs(ped, ids[:, 1:])
    return sibs
示例#8
0
文件: poo.py 项目: orenlivne/ober
 def __init__(self, chrom, pedigree=None, genotyped_id_file=None, ibd_index_location=None, params=None):
     '''Load data and initialize for chromosome number chrom.'''
     self.params = params if params else im.param.PhaseParam()
     # TODO: replace this hard-coded value by pedigree location parameter passing
     self.ped = im.io_pedigree.read(pedigree, genotyped_id_file=genotyped_id_file) if pedigree else im.hutt_pedigree() 
     self.ibd = im.index.segment_index.SegmentIndex(ibd_index_location if ibd_index_location else os.environ['OBER_OUT'] + '/index_segments')
     self.num_snps = len(self.ibd._snp)
     # Quasi-founder = either parent of his/her is not genotyped 
     self.quasi_founders = np.where([all((y >= self.ped.num_genotyped) for y in self.ped.graph.predecessors(x))
                                     for x in xrange(self.ped.num_genotyped)])[0]
     self.quasi_founders_set = set(self.quasi_founders)
     self._init_chrom(chrom)
示例#9
0
文件: outlier.py 项目: orenlivne/ober
    ax = P.axes()
    rects1 = ax.bar(ind, k0, width=width, color='b')
    rects2 = ax.bar(ind + width, k1, width=width, color='r')
    
    # add some
    ax.set_ylabel('Genomic Fraction of IBD Sharing')
    ax.set_title('Sample IBD Sharing Comparison')
    ax.set_xticks(ind + width)
    ax.set_xticklabels(['(%d,%d)\n%d' % (x[0][0], x[0][1], x[1]) for x in labels])
    ax.legend((rects1[0], rects2[0]), ('Sample 80', 'Sample 248'))
#    autolabel(ax, rects1)
#    autolabel(ax, rects2)
    P.show()

####################################################################################
if __name__ == '__main__':
    total = sum(np.loadtxt('/home/oren/ober/testdata/misc/chromosome.txt', usecols=[3]))
    ped = im.hutt_pedigree()
    np.set_printoptions(linewidth=100)
    samples = (80, 248)
    r0 = closest_relatives(80)
    r1 = closest_relatives(248)
    
    k0, k1, labels, d = bar_data(r0, r1, num=50)
    plot_ibd_sharing(k0, k1, labels)
    P.savefig('/home/oren/ober/doc/imputation/validation/affy/ibd_comparison.png')
    
    k0, k1, labels, d = bar_data(r0, r1, num=8)
    plot_ibd_sharing(k0, k1, labels)
    P.savefig('/home/oren/ober/doc/imputation/validation/affy/ibd_comparison_top.png')
        
示例#10
0
def get_sib_ids(path):
    ped = im.hutt_pedigree()
    ids = get_child_ids(path)
    sibs = kid_sibs(ped, ids[:, 1:])
    return sibs
示例#11
0
Created on March 17, 2014
@author: Oren Livne <*****@*****.**>
============================================================
'''
import impute as im, numpy as np, os, matplotlib.pyplot as P  # , db_gene
from scipy import stats
from matplotlib import rc
rc('font', **{'family': 'sans-serif', 'sans-serif': ['Helvetica']})
rc('text', usetex=True)

#---------------------------------------------
# Constants
#---------------------------------------------
'''Number of samples.'''
n = im.hutt_pedigree().num_genotyped
'''Converts detailed to condensed by multiplying by it on the right.'''
A = np.matrix(np.zeros((15, 9)))
S = im.poo.idcoef.S
A[np.arange(len(S)), S] = 1


#---------------------------------------------
# Methods
#---------------------------------------------
def row(i, j):
    '''Delta array row corresponding to the sample pair (i,j).'''
    return i * n + j


def ind(r):
示例#12
0
Created on March 17, 2014
@author: Oren Livne <*****@*****.**>
============================================================
'''
import impute as im, numpy as np, os, matplotlib.pyplot as P  # , db_gene
from scipy import stats
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('text', usetex=True)

#---------------------------------------------
# Constants
#---------------------------------------------
'''Number of samples.'''
n = im.hutt_pedigree().num_genotyped

'''Converts detailed to condensed by multiplying by it on the right.'''
A = np.matrix(np.zeros((15, 9)))
S = im.poo.idcoef.S
A[np.arange(len(S)), S] = 1

#---------------------------------------------
# Methods
#---------------------------------------------
def row(i, j):
    '''Delta array row corresponding to the sample pair (i,j).'''
    return i * n + j

def ind(r):
    '''Sample pair (i,j) of row r.'''